sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 11411254: SECCOMP-BPF: Added supported for inspection system call arguments from BPF filters.

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11411254: SECCOMP-BPF: Added supported for inspection system call arguments from BPF filters. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Changelist is ready for initial review; still needs a few TODO()s to be resolved, though. Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« sandbox/linux/seccomp-bpf/sandbox_bpf.h ('K') | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc » ('j') | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <endian.h>	5 #include <endian.h>

6 #if __BYTE_ORDER == __BIG_ENDIAN	6 #if __BYTE_ORDER == __BIG_ENDIAN

7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit	7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit

8 // values that need to be inspected by a virtual machine that only ever	8 // values that need to be inspected by a virtual machine that only ever

9 // operates on 32bit values. The kernel developers decided how values	9 // operates on 32bit values. The kernel developers decided how values

10 // should be split into two 32bit words to achieve this goal. But at this	10 // should be split into two 32bit words to achieve this goal. But at this

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
65	65

66 // The kernel gives us a sandbox, we turn it into a playground :-)	66 // The kernel gives us a sandbox, we turn it into a playground :-)

67 // This is version 2 of the playground; version 1 was built on top of	67 // This is version 2 of the playground; version 1 was built on top of

68 // pre-BPF seccomp mode.	68 // pre-BPF seccomp mode.

69 namespace playground2 {	69 namespace playground2 {

70	70

71 const int kExpectedExitCode = 100;	71 const int kExpectedExitCode = 100;

72	72

73 // We define a really simple sandbox policy. It is just good enough for us	73 // We define a really simple sandbox policy. It is just good enough for us

74 // to tell that the sandbox has actually been activated.	74 // to tell that the sandbox has actually been activated.

75 ErrorCode Sandbox::probeEvaluator(int sysnum, void *) {	75 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) {

76 switch (sysnum) {	76 switch (sysnum) {

77 case __NR_getpid:	77 case __NR_getpid:

78 // Return EPERM so that we can check that the filter actually ran.	78 // Return EPERM so that we can check that the filter actually ran.

79 return ErrorCode(EPERM);	79 return ErrorCode(EPERM);

80 case __NR_exit_group:	80 case __NR_exit_group:

81 // Allow exit() with a non-default return code.	81 // Allow exit() with a non-default return code.

82 return ErrorCode(ErrorCode::ERR_ALLOWED);	82 return ErrorCode(ErrorCode::ERR_ALLOWED);

83 default:	83 default:

84 // Make everything else fail in an easily recognizable way.	84 // Make everything else fail in an easily recognizable way.

85 return ErrorCode(EINVAL);	85 return ErrorCode(EINVAL);

86 }	86 }

87 }	87 }

88	88

89 void Sandbox::probeProcess(void) {	89 void Sandbox::ProbeProcess(void) {

90 if (syscall(__NR_getpid) < 0 && errno == EPERM) {	90 if (syscall(__NR_getpid) < 0 && errno == EPERM) {

91 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	91 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

92 }	92 }

93 }	93 }

94	94

95 bool Sandbox::isValidSyscallNumber(int sysnum) {	95 bool Sandbox::IsValidSyscallNumber(int sysnum) {

96 return SyscallIterator::IsValid(sysnum);	96 return SyscallIterator::IsValid(sysnum);

97 }	97 }

98	98

99 ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) {	99 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {

100 if (!isValidSyscallNumber(sysnum)) {	100 if (!IsValidSyscallNumber(sysnum)) {

101 return ErrorCode(ENOSYS);	101 return ErrorCode(ENOSYS);

102 }	102 }

103 return ErrorCode(ErrorCode::ERR_ALLOWED);	103 return ErrorCode(ErrorCode::ERR_ALLOWED);

104 }	104 }

105	105

106 void Sandbox::tryVsyscallProcess(void) {	106 void Sandbox::TryVsyscallProcess(void) {

107 time_t current_time;	107 time_t current_time;

108 // time() is implemented as a vsyscall. With an older glibc, with	108 // time() is implemented as a vsyscall. With an older glibc, with

109 // vsyscall=emulate and some versions of the seccomp BPF patch	109 // vsyscall=emulate and some versions of the seccomp BPF patch

110 // we may get SIGKILL-ed. Detect this!	110 // we may get SIGKILL-ed. Detect this!

111 if (time(&current_time) != static_cast<time_t>(-1)) {	111 if (time(&current_time) != static_cast<time_t>(-1)) {

112 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	112 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

113 }	113 }

114 }	114 }

115	115

116 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(),	116 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),

117 EvaluateSyscall syscallEvaluator,	117 EvaluateSyscall syscall_evaluator,

118 void *aux,	118 void *aux,

119 int proc_fd) {	119 int proc_fd) {

120 // Block all signals before forking a child process. This prevents an	120 // Block all signals before forking a child process. This prevents an

121 // attacker from manipulating our test by sending us an unexpected signal.	121 // attacker from manipulating our test by sending us an unexpected signal.

122 sigset_t oldMask, newMask;	122 sigset_t old_mask, new_mask;

123 if (sigfillset(&newMask) \|\|	123 if (sigfillset(&new_mask) \|\|

124 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {	124 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {

125 SANDBOX_DIE("sigprocmask() failed");	125 SANDBOX_DIE("sigprocmask() failed");

126 }	126 }

127 int fds[2];	127 int fds[2];

128 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {	128 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {

129 SANDBOX_DIE("pipe() failed");	129 SANDBOX_DIE("pipe() failed");

130 }	130 }

131	131

132 if (fds[0] <= 2 \|\| fds[1] <= 2) {	132 if (fds[0] <= 2 \|\| fds[1] <= 2) {

133 SANDBOX_DIE("Process started without standard file descriptors");	133 SANDBOX_DIE("Process started without standard file descriptors");

134 }	134 }

135	135

136 pid_t pid = fork();	136 pid_t pid = fork();

137 if (pid < 0) {	137 if (pid < 0) {

138 // Die if we cannot fork(). We would probably fail a little later	138 // Die if we cannot fork(). We would probably fail a little later

139 // anyway, as the machine is likely very close to running out of	139 // anyway, as the machine is likely very close to running out of

140 // memory.	140 // memory.

141 // But what we don't want to do is return "false", as a crafty	141 // But what we don't want to do is return "false", as a crafty

142 // attacker might cause fork() to fail at will and could trick us	142 // attacker might cause fork() to fail at will and could trick us

143 // into running without a sandbox.	143 // into running without a sandbox.

144 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails	144 sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails

145 SANDBOX_DIE("fork() failed unexpectedly");	145 SANDBOX_DIE("fork() failed unexpectedly");

146 }	146 }

147	147

148 // In the child process	148 // In the child process

149 if (!pid) {	149 if (!pid) {

150 // Test a very simple sandbox policy to verify that we can	150 // Test a very simple sandbox policy to verify that we can

151 // successfully turn on sandboxing.	151 // successfully turn on sandboxing.

152 Die::EnableSimpleExit();	152 Die::EnableSimpleExit();

153	153

154 if (HANDLE_EINTR(close(fds[0]))) {	154 if (HANDLE_EINTR(close(fds[0]))) {

155 WriteFailedStderrSetupMessage(fds[1]);	155 WriteFailedStderrSetupMessage(fds[1]);

156 SANDBOX_DIE(NULL);	156 SANDBOX_DIE(NULL);

157 }	157 }

158 if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {	158 if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {

159 // Stderr could very well be a file descriptor to .xsession-errors, or	159 // Stderr could very well be a file descriptor to .xsession-errors, or

160 // another file, which could be backed by a file system that could cause	160 // another file, which could be backed by a file system that could cause

161 // dup2 to fail while trying to close stderr. It's important that we do	161 // dup2 to fail while trying to close stderr. It's important that we do

162 // not fail on trying to close stderr.	162 // not fail on trying to close stderr.

163 // If dup2 fails here, we will continue normally, this means that our	163 // If dup2 fails here, we will continue normally, this means that our

164 // parent won't cause a fatal failure if something writes to stderr in	164 // parent won't cause a fatal failure if something writes to stderr in

165 // this child.	165 // this child.

166 }	166 }

167 if (HANDLE_EINTR(close(fds[1]))) {	167 if (HANDLE_EINTR(close(fds[1]))) {

168 WriteFailedStderrSetupMessage(fds[1]);	168 WriteFailedStderrSetupMessage(fds[1]);

169 SANDBOX_DIE(NULL);	169 SANDBOX_DIE(NULL);

170 }	170 }

171	171

172 evaluators_.clear();	172 evaluators_.clear();

173 setSandboxPolicy(syscallEvaluator, aux);	173 SetSandboxPolicy(syscall_evaluator, aux);

174 setProcFd(proc_fd);	174 SetProcFd(proc_fd);

175	175

176 // By passing "quiet=true" to "startSandboxInternal()" we suppress	176 // By passing "quiet=true" to "startSandboxInternal()" we suppress

177 // messages for expected and benign failures (e.g. if the current	177 // messages for expected and benign failures (e.g. if the current

178 // kernel lacks support for BPF filters).	178 // kernel lacks support for BPF filters).

179 startSandboxInternal(true);	179 StartSandboxInternal(true);

180	180

181 // Run our code in the sandbox.	181 // Run our code in the sandbox.

182 CodeInSandbox();	182 code_in_sandbox();

183	183

184 // CodeInSandbox() is not supposed to return here.	184 // code_in_sandbox() is not supposed to return here.

185 SANDBOX_DIE(NULL);	185 SANDBOX_DIE(NULL);

186 }	186 }

187	187

188 // In the parent process.	188 // In the parent process.

189 if (HANDLE_EINTR(close(fds[1]))) {	189 if (HANDLE_EINTR(close(fds[1]))) {

190 SANDBOX_DIE("close() failed");	190 SANDBOX_DIE("close() failed");

191 }	191 }

192 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {	192 if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {

193 SANDBOX_DIE("sigprocmask() failed");	193 SANDBOX_DIE("sigprocmask() failed");

194 }	194 }

195 int status;	195 int status;

196 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {	196 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {

197 SANDBOX_DIE("waitpid() failed unexpectedly");	197 SANDBOX_DIE("waitpid() failed unexpectedly");

198 }	198 }

199 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;	199 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;

200	200

201 // If we fail to support sandboxing, there might be an additional	201 // If we fail to support sandboxing, there might be an additional

202 // error message. If so, this was an entirely unexpected and fatal	202 // error message. If so, this was an entirely unexpected and fatal

(...skipping 11 matching lines...) Expand all Loading...
214 SANDBOX_DIE(buf);	214 SANDBOX_DIE(buf);

215 }	215 }

216 }	216 }

217 if (HANDLE_EINTR(close(fds[0]))) {	217 if (HANDLE_EINTR(close(fds[0]))) {

218 SANDBOX_DIE("close() failed");	218 SANDBOX_DIE("close() failed");

219 }	219 }

220	220

221 return rc;	221 return rc;

222 }	222 }

223	223

224 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) {	224 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) {

225 #if defined(SECCOMP_BPF_VALGRIND_HACKS)	225 #if defined(SECCOMP_BPF_VALGRIND_HACKS)

226 if (RUNNING_ON_VALGRIND) {	226 if (RUNNING_ON_VALGRIND) {

227 // Valgrind doesn't like our run-time test. Disable testing and assume we	227 // Valgrind doesn't like our run-time test. Disable testing and assume we

228 // always support sandboxing. This feature should only ever be enabled when	228 // always support sandboxing. This feature should only ever be enabled when

229 // debugging.	229 // debugging.

230 return true;	230 return true;

231 }	231 }

232 #endif	232 #endif

233	233

234 return	234 return

235 RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) &&	235 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) &&

236 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0,	236 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0,

237 proc_fd);	237 proc_fd);

238 }	238 }

239	239

240 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {	240 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {

241 // It the sandbox is currently active, we clearly must have support for	241 // It the sandbox is currently active, we clearly must have support for

242 // sandboxing.	242 // sandboxing.

243 if (status_ == STATUS_ENABLED) {	243 if (status_ == STATUS_ENABLED) {

244 return status_;	244 return status_;

245 }	245 }

246	246

247 // Even if the sandbox was previously available, something might have	247 // Even if the sandbox was previously available, something might have

248 // changed in our run-time environment. Check one more time.	248 // changed in our run-time environment. Check one more time.

249 if (status_ == STATUS_AVAILABLE) {	249 if (status_ == STATUS_AVAILABLE) {

250 if (!isSingleThreaded(proc_fd)) {	250 if (!IsSingleThreaded(proc_fd)) {

251 status_ = STATUS_UNAVAILABLE;	251 status_ = STATUS_UNAVAILABLE;

252 }	252 }

253 return status_;	253 return status_;

254 }	254 }

255	255

256 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) {	256 if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {

257 // All state transitions resulting in STATUS_UNAVAILABLE are immediately	257 // All state transitions resulting in STATUS_UNAVAILABLE are immediately

258 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all	258 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all

259 // happen, if and only if they are triggered by the process being multi-	259 // happen, if and only if they are triggered by the process being multi-

260 // threaded.	260 // threaded.

261 // In other words, if a single-threaded process is currently in the	261 // In other words, if a single-threaded process is currently in the

262 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is	262 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is

263 // actually available.	263 // actually available.

264 status_ = STATUS_AVAILABLE;	264 status_ = STATUS_AVAILABLE;

265 return status_;	265 return status_;

266 }	266 }

267	267

268 // If we have not previously checked for availability of the sandbox or if	268 // If we have not previously checked for availability of the sandbox or if

269 // we otherwise don't believe to have a good cached value, we have to	269 // we otherwise don't believe to have a good cached value, we have to

270 // perform a thorough check now.	270 // perform a thorough check now.

271 if (status_ == STATUS_UNKNOWN) {	271 if (status_ == STATUS_UNKNOWN) {

272 status_ = kernelSupportSeccompBPF(proc_fd)	272 status_ = KernelSupportSeccompBPF(proc_fd)

273 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;	273 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;

274	274

275 // As we are performing our tests from a child process, the run-time	275 // As we are performing our tests from a child process, the run-time

276 // environment that is visible to the sandbox is always guaranteed to be	276 // environment that is visible to the sandbox is always guaranteed to be

277 // single-threaded. Let's check here whether the caller is single-	277 // single-threaded. Let's check here whether the caller is single-

278 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.	278 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.

279 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) {	279 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {

280 status_ = STATUS_UNAVAILABLE;	280 status_ = STATUS_UNAVAILABLE;

281 }	281 }

282 }	282 }

283 return status_;	283 return status_;

284 }	284 }

285	285

286 void Sandbox::setProcFd(int proc_fd) {	286 void Sandbox::SetProcFd(int proc_fd) {

287 proc_fd_ = proc_fd;	287 proc_fd_ = proc_fd;

288 }	288 }

289	289

290 void Sandbox::startSandboxInternal(bool quiet) {	290 void Sandbox::StartSandboxInternal(bool quiet) {

291 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {	291 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {

292 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "	292 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "

293 "unavailable");	293 "unavailable");

294 } else if (status_ == STATUS_ENABLED) {	294 } else if (status_ == STATUS_ENABLED) {

295 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "	295 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "

296 "setSandboxPolicy() to stack policies instead");	296 "setSandboxPolicy() to stack policies instead");

297 }	297 }

298 if (proc_fd_ < 0) {	298 if (proc_fd_ < 0) {

299 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);	299 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);

300 }	300 }

301 if (proc_fd_ < 0) {	301 if (proc_fd_ < 0) {

302 // For now, continue in degraded mode, if we can't access /proc.	302 // For now, continue in degraded mode, if we can't access /proc.

303 // In the future, we might want to tighten this requirement.	303 // In the future, we might want to tighten this requirement.

304 }	304 }

305 if (!isSingleThreaded(proc_fd_)) {	305 if (!IsSingleThreaded(proc_fd_)) {

306 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");	306 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");

307 }	307 }

308	308

309 // We no longer need access to any files in /proc. We want to do this	309 // We no longer need access to any files in /proc. We want to do this

310 // before installing the filters, just in case that our policy denies	310 // before installing the filters, just in case that our policy denies

311 // close().	311 // close().

312 if (proc_fd_ >= 0) {	312 if (proc_fd_ >= 0) {

313 if (HANDLE_EINTR(close(proc_fd_))) {	313 if (HANDLE_EINTR(close(proc_fd_))) {

314 SANDBOX_DIE("Failed to close file descriptor for /proc");	314 SANDBOX_DIE("Failed to close file descriptor for /proc");

315 }	315 }

316 proc_fd_ = -1;	316 proc_fd_ = -1;

317 }	317 }

318	318

319 // Install the filters.	319 // Install the filters.

320 installFilter(quiet);	320 InstallFilter(quiet);

321	321

322 // We are now inside the sandbox.	322 // We are now inside the sandbox.

323 status_ = STATUS_ENABLED;	323 status_ = STATUS_ENABLED;

324 }	324 }

325	325

326 bool Sandbox::isSingleThreaded(int proc_fd) {	326 bool Sandbox::IsSingleThreaded(int proc_fd) {

327 if (proc_fd < 0) {	327 if (proc_fd < 0) {

328 // Cannot determine whether program is single-threaded. Hope for	328 // Cannot determine whether program is single-threaded. Hope for

329 // the best...	329 // the best...

330 return true;	330 return true;

331 }	331 }

332	332

333 struct stat sb;	333 struct stat sb;

334 int task = -1;	334 int task = -1;

335 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|	335 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

336 fstat(task, &sb) != 0 \|\|	336 fstat(task, &sb) != 0 \|\|

337 sb.st_nlink != 3 \|\|	337 sb.st_nlink != 3 \|\|

338 HANDLE_EINTR(close(task))) {	338 HANDLE_EINTR(close(task))) {

339 if (task >= 0) {	339 if (task >= 0) {

340 if (HANDLE_EINTR(close(task))) { }	340 if (HANDLE_EINTR(close(task))) { }

341 }	341 }

342 return false;	342 return false;

343 }	343 }

344 return true;	344 return true;

345 }	345 }

346	346

347 bool Sandbox::isDenied(const ErrorCode& code) {	347 bool Sandbox::IsDenied(const ErrorCode& code) {

348 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|	348 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|

349 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&	349 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&

350 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));	350 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));

351 }	351 }

352	352

353 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,	353 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,

354 void *aux) {	354 void *aux) {

355 for (SyscallIterator iter(true); !iter.Done(); ) {	355 for (SyscallIterator iter(true); !iter.Done(); ) {

356 uint32_t sysnum = iter.Next();	356 uint32_t sysnum = iter.Next();

357 if (!isDenied(syscallEvaluator(sysnum, aux))) {	357 if (!IsDenied(syscall_evaluator(sysnum, aux))) {

358 SANDBOX_DIE("Policies should deny system calls that are outside the "	358 SANDBOX_DIE("Policies should deny system calls that are outside the "

359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");	359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");

360 }	360 }

361 }	361 }

362 return;	362 return;

363 }	363 }

364	364

365 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {	365 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {

366 if (BPF_CLASS(insn->code) == BPF_RET &&	366 if (BPF_CLASS(insn->code) == BPF_RET &&

367 insn->k > SECCOMP_RET_TRAP &&	367 insn->k > SECCOMP_RET_TRAP &&

368 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) {	368 insn->k - SECCOMP_RET_TRAP <= trap_array_size_) {

369 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1];	369 const ErrorCode& err = trap_array_[insn->k - SECCOMP_RET_TRAP - 1];

370 if (!err.safe_) {	370 if (!err.safe_) {

371 bool is_unsafe = static_cast<bool >(aux);	371 bool is_unsafe = static_cast<bool >(aux);

372 *is_unsafe = true;	372 *is_unsafe = true;

373 }	373 }

374 }	374 }

375 }	375 }

376	376

377 void Sandbox::RedirectToUserspace(Instruction insn, void aux) {	377 void Sandbox::RedirectToUserspace(Instruction insn, void ) {

378 // When inside an UnsafeTrap() callback, we want to allow all system calls.	378 // When inside an UnsafeTrap() callback, we want to allow all system calls.

379 // This means, we must conditionally disable the sandbox -- and that's not	379 // This means, we must conditionally disable the sandbox -- and that's not

380 // something that kernel-side BPF filters can do, as they cannot inspect	380 // something that kernel-side BPF filters can do, as they cannot inspect

381 // any state other than the syscall arguments.	381 // any state other than the syscall arguments.

382 // But if we redirect all error handlers to user-space, then we can easily	382 // But if we redirect all error handlers to user-space, then we can easily

383 // make this decision.	383 // make this decision.

384 // The performance penalty for this extra round-trip to user-space is not	384 // The performance penalty for this extra round-trip to user-space is not

385 // actually that bad, as we only ever pay it for denied system calls; and a	385 // actually that bad, as we only ever pay it for denied system calls; and a

386 // typical program has very few of these.	386 // typical program has very few of these.

387 if (BPF_CLASS(insn->code) == BPF_RET &&	387 if (BPF_CLASS(insn->code) == BPF_RET &&

388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {	388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

389 insn->k = Trap(ReturnErrno,	389 insn->k = Trap(ReturnErrno,

390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();	390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

391 }	391 }

392 }	392 }

393	393

394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {	394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {

395 // We need to replicate the behavior of RedirectToUserspace(), so that our	395 // We need to replicate the behavior of RedirectToUserspace(), so that our

396 // Verifier can still work correctly.	396 // Verifier can still work correctly.

397 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);	397 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);

398 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();	398 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();

399 ErrorCode err = evaluator.first(sysnum, evaluator.second);	399 ErrorCode err = evaluator.first(sysnum, evaluator.second);

400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {	400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

401 return Trap(ReturnErrno,	401 return Trap(ReturnErrno,

402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));	402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

403 }	403 }

404 return err;	404 return err;

405 }	405 }

406	406

407 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {	407 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {

408 if (status_ == STATUS_ENABLED) {	408 if (status_ == STATUS_ENABLED) {

409 SANDBOX_DIE("Cannot change policy after sandbox has started");	409 SANDBOX_DIE("Cannot change policy after sandbox has started");

410 }	410 }

411 policySanityChecks(syscallEvaluator, aux);	411 PolicySanityChecks(syscall_evaluator, aux);

412 evaluators_.push_back(std::make_pair(syscallEvaluator, aux));	412 evaluators_.push_back(std::make_pair(syscall_evaluator, aux));

413 }	413 }

414	414

415 void Sandbox::installFilter(bool quiet) {	415 void Sandbox::InstallFilter(bool quiet) {

416 // Verify that the user pushed a policy.	416 // Verify that the user pushed a policy.

417 if (evaluators_.empty()) {	417 if (evaluators_.empty()) {

418 filter_failed:	418 filter_failed:

419 SANDBOX_DIE("Failed to configure system call filters");	419 SANDBOX_DIE("Failed to configure system call filters");

420 }	420 }

421	421

422 // Set new SIGSYS handler	422 // Set new SIGSYS handler

423 struct sigaction sa;	423 struct sigaction sa;

424 memset(&sa, 0, sizeof(sa));	424 memset(&sa, 0, sizeof(sa));

425 sa.sa_sigaction = sigSys;	425 sa.sa_sigaction = SigSys;

426 sa.sa_flags = SA_SIGINFO \| SA_NODEFER;	426 sa.sa_flags = SA_SIGINFO \| SA_NODEFER;

427 if (sigaction(SIGSYS, &sa, NULL) < 0) {	427 if (sigaction(SIGSYS, &sa, NULL) < 0) {

428 goto filter_failed;	428 goto filter_failed;

429 }	429 }

430	430

431 // Unmask SIGSYS	431 // Unmask SIGSYS

432 sigset_t mask;	432 sigset_t mask;

433 if (sigemptyset(&mask) \|\|	433 if (sigemptyset(&mask) \|\|

434 sigaddset(&mask, SIGSYS) \|\|	434 sigaddset(&mask, SIGSYS) \|\|

435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {	435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {

(...skipping 15 matching lines...) Expand all Loading...
451 // If the architecture doesn't match SECCOMP_ARCH, disallow the	451 // If the architecture doesn't match SECCOMP_ARCH, disallow the

452 // system call.	452 // system call.

453 Instruction *tail;	453 Instruction *tail;

454 Instruction *head =	454 Instruction *head =

455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,	455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

456 offsetof(struct arch_seccomp_data, arch),	456 offsetof(struct arch_seccomp_data, arch),

457 tail =	457 tail =

458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,	458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,

459 NULL,	459 NULL,

460 gen->MakeInstruction(BPF_RET+BPF_K,	460 gen->MakeInstruction(BPF_RET+BPF_K,

461 Kill(	461 Kill("Invalid audit architecture in BPF filter"))));

462 "Invalid audit architecture in BPF filter").err_)));

463	462

464 {	463 {

465 // Evaluate all possible system calls and group their ErrorCodes into	464 // Evaluate all possible system calls and group their ErrorCodes into

466 // ranges of identical codes.	465 // ranges of identical codes.

467 Ranges ranges;	466 Ranges ranges;

468 findRanges(&ranges);	467 FindRanges(&ranges);

469	468

470 // Compile the system call ranges to an optimized BPF jumptable	469 // Compile the system call ranges to an optimized BPF jumptable

471 Instruction *jumptable =	470 Instruction *jumptable =

472 assembleJumpTable(gen, ranges.begin(), ranges.end());	471 AssembleJumpTable(gen, ranges.begin(), ranges.end());

473	472

474 // If there is at least one UnsafeTrap() in our program, the entire sandbox	473 // If there is at least one UnsafeTrap() in our program, the entire sandbox

475 // is unsafe. We need to modify the program so that all non-	474 // is unsafe. We need to modify the program so that all non-

476 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then	475 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then

477 // allow us to temporarily disable sandboxing rules inside of callbacks to	476 // allow us to temporarily disable sandboxing rules inside of callbacks to

478 // UnsafeTrap().	477 // UnsafeTrap().

479 has_unsafe_traps_ = false;	478 has_unsafe_traps_ = false;

480 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);	479 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);

481	480

482 // Grab the system call number, so that we can implement jump tables.	481 // Grab the system call number, so that we can implement jump tables.

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
618 // system memory allocator that is in effect, these operators can result	617 // system memory allocator that is in effect, these operators can result

619 // in system calls to things like munmap() or brk().	618 // in system calls to things like munmap() or brk().

620 struct sock_filter bpf[program->size()];	619 struct sock_filter bpf[program->size()];

621 const struct sock_fprog prog = {	620 const struct sock_fprog prog = {

622 static_cast<unsigned short>(program->size()), bpf };	621 static_cast<unsigned short>(program->size()), bpf };

623 memcpy(bpf, &(*program)[0], sizeof(bpf));	622 memcpy(bpf, &(*program)[0], sizeof(bpf));

624 delete program;	623 delete program;

625	624

626 // Release memory that is no longer needed	625 // Release memory that is no longer needed

627 evaluators_.clear();	626 evaluators_.clear();

	627 conds_.clear();

628	628

629 #if defined(SECCOMP_BPF_VALGRIND_HACKS)	629 #if defined(SECCOMP_BPF_VALGRIND_HACKS)

630 // Valgrind is really not happy about our sandbox. Disable it when running	630 // Valgrind is really not happy about our sandbox. Disable it when running

631 // in Valgrind. This feature is dangerous and should never be enabled by	631 // in Valgrind. This feature is dangerous and should never be enabled by

632 // default. We protect it behind a pre-processor option.	632 // default. We protect it behind a pre-processor option.

633 if (!RUNNING_ON_VALGRIND)	633 if (!RUNNING_ON_VALGRIND)

634 #endif	634 #endif

635 {	635 {

636 // Install BPF filter program	636 // Install BPF filter program

637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {	637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {

638 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");	638 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");

639 } else {	639 } else {

640 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {	640 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

641 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");	641 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");

642 }	642 }

643 }	643 }

644 }	644 }

645	645

646 return;	646 return;

647 }	647 }

648	648

649 void Sandbox::findRanges(Ranges *ranges) {	649 void Sandbox::FindRanges(Ranges *ranges) {

650 // Please note that "struct seccomp_data" defines system calls as a signed	650 // Please note that "struct seccomp_data" defines system calls as a signed

651 // int32_t, but BPF instructions always operate on unsigned quantities. We	651 // int32_t, but BPF instructions always operate on unsigned quantities. We

652 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,	652 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

653 // and then verifying that the rest of the number range (both positive and	653 // and then verifying that the rest of the number range (both positive and

654 // negative) all return the same ErrorCode.	654 // negative) all return the same ErrorCode.

655 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	655 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first;

656 void *aux = evaluators_.begin()->second;	656 void *aux = evaluators_.begin()->second;

657 uint32_t oldSysnum = 0;	657 uint32_t old_sysnum = 0;

658 ErrorCode oldErr = evaluateSyscall(oldSysnum, aux);	658 ErrorCode old_err = evaluate_syscall(old_sysnum, aux);

659 ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux);	659 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux);

660 for (SyscallIterator iter(false); !iter.Done(); ) {	660 for (SyscallIterator iter(false); !iter.Done(); ) {

661 uint32_t sysnum = iter.Next();	661 uint32_t sysnum = iter.Next();

662 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux);	662 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux);

663 if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) {	663 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {

664 // A proper sandbox policy should always treat system calls outside of	664 // A proper sandbox policy should always treat system calls outside of

665 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns	665 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns

666 // "false" for SyscallIterator::IsValid()) identically. Typically, all	666 // "false" for SyscallIterator::IsValid()) identically. Typically, all

667 // of these system calls would be denied with the same ErrorCode.	667 // of these system calls would be denied with the same ErrorCode.

668 SANDBOX_DIE("Invalid seccomp policy");	668 SANDBOX_DIE("Invalid seccomp policy");

669 }	669 }

670 if (!err.Equals(oldErr) \|\| iter.Done()) {	670 if (!err.Equals(old_err) \|\| iter.Done()) {

671 ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr));	671 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));

672 oldSysnum = sysnum;	672 old_sysnum = sysnum;

673 oldErr = err;	673 old_err = err;

674 }	674 }

675 }	675 }

676 }	676 }

677	677

678 Instruction Sandbox::assembleJumpTable(CodeGen gen,	678 Instruction Sandbox::AssembleJumpTable(CodeGen gen,

679 Ranges::const_iterator start,	679 Ranges::const_iterator start,

680 Ranges::const_iterator stop) {	680 Ranges::const_iterator stop) {

681 // We convert the list of system call ranges into jump table that performs	681 // We convert the list of system call ranges into jump table that performs

682 // a binary search over the ranges.	682 // a binary search over the ranges.

683 // As a sanity check, we need to have at least one distinct ranges for us	683 // As a sanity check, we need to have at least one distinct ranges for us

684 // to be able to build a jump table.	684 // to be able to build a jump table.

685 if (stop - start <= 0) {	685 if (stop - start <= 0) {

686 SANDBOX_DIE("Invalid set of system call ranges");	686 SANDBOX_DIE("Invalid set of system call ranges");

687 } else if (stop - start == 1) {	687 } else if (stop - start == 1) {

688 // If we have narrowed things down to a single range object, we can	688 // If we have narrowed things down to a single range object, we can

689 // return from the BPF filter program.	689 // return from the BPF filter program.

690 return gen->MakeInstruction(BPF_RET+BPF_K, start->err);	690 return RetExpression(gen, start->err);

691 }	691 }

692	692

693 // Pick the range object that is located at the mid point of our list.	693 // Pick the range object that is located at the mid point of our list.

694 // We compare our system call number against the lowest valid system call	694 // We compare our system call number against the lowest valid system call

695 // number in this range object. If our number is lower, it is outside of	695 // number in this range object. If our number is lower, it is outside of

696 // this range object. If it is greater or equal, it might be inside.	696 // this range object. If it is greater or equal, it might be inside.

697 Ranges::const_iterator mid = start + (stop - start)/2;	697 Ranges::const_iterator mid = start + (stop - start)/2;

698	698

699 // Sub-divide the list of ranges and continue recursively.	699 // Sub-divide the list of ranges and continue recursively.

700 Instruction *jf = assembleJumpTable(gen, start, mid);	700 Instruction *jf = AssembleJumpTable(gen, start, mid);

701 Instruction *jt = assembleJumpTable(gen, mid, stop);	701 Instruction *jt = AssembleJumpTable(gen, mid, stop);

702 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);	702 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);

703 }	703 }

704	704

705 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {	705 Instruction Sandbox::RetExpression(CodeGen gen, const ErrorCode& cond) {

	706 if (cond.error_type_ == ErrorCode::ET_COND) {

	707 return CondExpression(gen, cond);

	708 } else {

	709 return gen->MakeInstruction(BPF_RET+BPF_K, cond);

	710 }

	711 }

	712

	713 Instruction Sandbox::CondExpression(CodeGen gen, const ErrorCode& cond) {

	714 // We can only inspect the six system call arguments that are passed in

	715 // CPU registers.
	jln (very slow on Chromium) 2012/12/06 00:35:00 It's even worse than that and is architecture depe It's even worse than that and is architecture dependent. On IA32, the sixth argument is on the stack. Markus (顧孟勤) 2012/12/12 20:54:35 I am almost certain this is a red herring. Yes, fo I am almost certain this is a red herring. Yes, for SYSENTER, the argument is placed on the stack. But the kernel then quickly retrieves it from there and the rest of the code path is identical to what we are doing for "int $0x80". This all happens before BPF filters come into play. So, no risk of any race conditions. On 2012/12/06 00:35:00, Julien Tinnes wrote: Show quoted text > It's even worse than that and is architecture dependent. > > On IA32, the sixth argument is on the stack.
	716 if (cond.argno_ < 0 \|\| cond.argno_ >= 6) {

	717 SANDBOX_DIE("Internal compiler error; invalid argument number "

	718 "encountered");

	719 }

	720

	721 // BPF programs operate on 32bit entities. Load both halfs of the 64bit

	722 // system call argument and then generate suitable conditional statements.

	723 Instruction *msb = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	724 offsetof(struct arch_seccomp_data, args) +

	725 cond.argno_ * sizeof(uint64_t) +

	726 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4)); // Most significant bits

	727 Instruction *lsb = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	728 offsetof(struct arch_seccomp_data, args) +

	729 cond.argno_ * sizeof(uint64_t) +

	730 (__BYTE_ORDER == __BIG_ENDIAN ? 4 : 0)); // Least significant bits

	731

	732 // Emit a suitable comparison statement.

	733 switch (cond.op_) {

	734 case ErrorCode::OP_EQUAL:

	735 // Compare the least significant bits for equality

	736 gen->JoinInstructions(lsb,

	737 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,

	738 static_cast<uint32_t>(cond.value_),

	739 RetExpression(gen, *cond.passed_),

	740 RetExpression(gen, *cond.failed_)));

	741

	742 // If we are looking at a 64bit argument, we need to also compare the

	743 // most significant bits.

	744 if (cond.width_ == ErrorCode::TP_64BIT) {

	745 lsb = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,

	746 static_cast<uint32_t>(cond.value_ >> 32),

	747 lsb,

	748 RetExpression(gen, *cond.failed_));

	749 }

	750 break;

	751 default:

	752 // TODO(markus): We can only check for equality so far.

	753 SANDBOX_DIE("Not implemented");

	754 break;

	755 }

	756

	757 // Ensure that we never pass a 64bit value, when we only expect a 32bit

	758 // value.

	759 if (cond.width_ == ErrorCode::TP_32BIT) {

	760 gen->JoinInstructions(msb,

	761 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0, lsb,

	762 RetExpression(gen, Kill("Unexpected 64bit argument detected"))));

	763 } else {

	764 gen->JoinInstructions(msb, lsb);

	765 }

	766

	767 return msb;

	768 }

	769

	770 void Sandbox::SigSys(int nr, siginfo_t info, void void_context) {

706 // Various sanity checks to make sure we actually received a signal	771 // Various sanity checks to make sure we actually received a signal

707 // triggered by a BPF filter. If something else triggered SIGSYS	772 // triggered by a BPF filter. If something else triggered SIGSYS

708 // (e.g. kill()), there is really nothing we can do with this signal.	773 // (e.g. kill()), there is really nothing we can do with this signal.

709 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context \|\|	774 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context \|\|

710 info->si_errno <= 0 \|\|	775 info->si_errno <= 0 \|\|

711 static_cast<size_t>(info->si_errno) > trapArraySize_) {	776 static_cast<size_t>(info->si_errno) > trap_array_size_) {

712 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal	777 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal

713 // safe and can lead to bugs. We should eventually implement a different	778 // safe and can lead to bugs. We should eventually implement a different

714 // logging and reporting mechanism that is safe to be called from	779 // logging and reporting mechanism that is safe to be called from

715 // the sigSys() handler.	780 // the sigSys() handler.

716 // TODO: If we feel confident that our code otherwise works correctly, we	781 // TODO: If we feel confident that our code otherwise works correctly, we

717 // could actually make an argument that spurious SIGSYS should	782 // could actually make an argument that spurious SIGSYS should

718 // just get silently ignored. TBD	783 // just get silently ignored. TBD

719 sigsys_err:	784 sigsys_err:

720 SANDBOX_DIE("Unexpected SIGSYS received");	785 SANDBOX_DIE("Unexpected SIGSYS received");

721 }	786 }

(...skipping 23 matching lines...) Expand all Loading...
745 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {	810 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {

746 errno = old_errno;	811 errno = old_errno;

747 if (sigsys.nr == __NR_clone) {	812 if (sigsys.nr == __NR_clone) {

748 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");	813 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");

749 }	814 }

750 rc = SandboxSyscall(sigsys.nr,	815 rc = SandboxSyscall(sigsys.nr,

751 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),	816 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),

752 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),	817 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),

753 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));	818 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));

754 } else {	819 } else {

755 const ErrorCode& err = trapArray_[info->si_errno - 1];	820 const ErrorCode& err = trap_array_[info->si_errno - 1];

756 if (!err.safe_) {	821 if (!err.safe_) {

757 SetIsInSigHandler();	822 SetIsInSigHandler();

758 }	823 }

759	824

760 // Copy the seccomp-specific data into a arch_seccomp_data structure. This	825 // Copy the seccomp-specific data into a arch_seccomp_data structure. This

761 // is what we are showing to TrapFnc callbacks that the system call	826 // is what we are showing to TrapFnc callbacks that the system call

762 // evaluator registered with the sandbox.	827 // evaluator registered with the sandbox.

763 struct arch_seccomp_data data = {	828 struct arch_seccomp_data data = {

764 sigsys.nr,	829 sigsys.nr,

765 SECCOMP_ARCH,	830 SECCOMP_ARCH,

(...skipping 30 matching lines...) Expand all Loading...
796 } else {	861 } else {

797 return safe < o.safe;	862 return safe < o.safe;

798 }	863 }

799 }	864 }

800	865

801 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,	866 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,

802 bool safe) {	867 bool safe) {

803 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance	868 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance

804 // of a SECCOMP_RET_TRAP.	869 // of a SECCOMP_RET_TRAP.

805 TrapKey key(fnc, aux, safe);	870 TrapKey key(fnc, aux, safe);

806 TrapIds::const_iterator iter = trapIds_.find(key);	871 TrapIds::const_iterator iter = trap_ids_.find(key);

807 uint16_t id;	872 uint16_t id;

808 if (iter != trapIds_.end()) {	873 if (iter != trap_ids_.end()) {

809 // We have seen this pair before. Return the same id that we assigned	874 // We have seen this pair before. Return the same id that we assigned

810 // earlier.	875 // earlier.

811 id = iter->second;	876 id = iter->second;

812 } else {	877 } else {

813 // This is a new pair. Remember it and assign a new id.	878 // This is a new pair. Remember it and assign a new id.

814 // Please note that we have to store traps in memory that doesn't get	879 // Please note that we have to store traps in memory that doesn't get

815 // deallocated when the program is shutting down. A memory leak is	880 // deallocated when the program is shutting down. A memory leak is

816 // intentional, because we might otherwise not be able to execute	881 // intentional, because we might otherwise not be able to execute

817 // system calls part way through the program shutting down	882 // system calls part way through the program shutting down

818 if (!traps_) {	883 if (!traps_) {

819 traps_ = new Traps();	884 traps_ = new Traps();

820 }	885 }

821 if (traps_->size() >= SECCOMP_RET_DATA) {	886 if (traps_->size() >= SECCOMP_RET_DATA) {

822 // In practice, this is pretty much impossible to trigger, as there	887 // In practice, this is pretty much impossible to trigger, as there

823 // are other kernel limitations that restrict overall BPF program sizes.	888 // are other kernel limitations that restrict overall BPF program sizes.

824 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");	889 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");

825 }	890 }

826 id = traps_->size() + 1;	891 id = traps_->size() + 1;

827	892

828 traps_->push_back(ErrorCode(fnc, aux, safe, id));	893 traps_->push_back(ErrorCode(fnc, aux, safe, id));

829 trapIds_[key] = id;	894 trap_ids_[key] = id;

830	895

831 // We want to access the traps_ vector from our signal handler. But	896 // We want to access the traps_ vector from our signal handler. But

832 // we are not assured that doing so is async-signal safe. On the other	897 // we are not assured that doing so is async-signal safe. On the other

833 // hand, C++ guarantees that the contents of a vector is stored in a	898 // hand, C++ guarantees that the contents of a vector is stored in a

834 // contiguous C-style array.	899 // contiguous C-style array.

835 // So, we look up the address and size of this array outside of the	900 // So, we look up the address and size of this array outside of the

836 // signal handler, where we can safely do so.	901 // signal handler, where we can safely do so.

837 trapArray_ = &(*traps_)[0];	902 trap_array_ = &(*traps_)[0];

838 trapArraySize_ = id;	903 trap_array_size_ = id;

839 return traps_->back();	904 return traps_->back();

840 }	905 }

841	906

842 return ErrorCode(fnc, aux, safe, id);	907 return ErrorCode(fnc, aux, safe, id);

843 }	908 }

844	909

845 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {	910 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {

846 return MakeTrap(fnc, aux, true /* Safe Trap */);	911 return MakeTrap(fnc, aux, true /* Safe Trap */);

847 }	912 }

848	913

(...skipping 13 matching lines...) Expand all Loading...
862	927

863 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {	928 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {

864 // TrapFnc functions report error by following the native kernel convention	929 // TrapFnc functions report error by following the native kernel convention

865 // of returning an exit code in the range of -1..-4096. They do not try to	930 // of returning an exit code in the range of -1..-4096. They do not try to

866 // set errno themselves. The glibc wrapper that triggered the SIGSYS will	931 // set errno themselves. The glibc wrapper that triggered the SIGSYS will

867 // ultimately do so for us.	932 // ultimately do so for us.

868 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;	933 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

869 return -err;	934 return -err;

870 }	935 }

871	936

872 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {	937 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,

	938 ErrorCode::Operation op, uint64_t value,

	939 const ErrorCode& passed, const ErrorCode& failed) {

	940 return ErrorCode(argno, width, op, value,

	941 &*conds_.insert(passed).first,

	942 &*conds_.insert(failed).first);

	943 }

	944

	945 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {

873 SANDBOX_DIE(static_cast<char *>(aux));	946 SANDBOX_DIE(static_cast<char *>(aux));

874 }	947 }

875	948

876 ErrorCode Sandbox::Kill(const char *msg) {	949 ErrorCode Sandbox::Kill(const char *msg) {

877 return Trap(bpfFailure, const_cast<char *>(msg));	950 return Trap(BpfFailure, const_cast<char *>(msg));

878 }	951 }

879	952

880 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	953 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

881 int Sandbox::proc_fd_ = -1;	954 int Sandbox::proc_fd_ = -1;

882 Sandbox::Evaluators Sandbox::evaluators_;	955 Sandbox::Evaluators Sandbox::evaluators_;

883 Sandbox::Traps *Sandbox::traps_ = NULL;	956 Sandbox::Traps *Sandbox::traps_ = NULL;

884 Sandbox::TrapIds Sandbox::trapIds_;	957 Sandbox::TrapIds Sandbox::trap_ids_;

885 ErrorCode *Sandbox::trapArray_ = NULL;	958 ErrorCode *Sandbox::trap_array_ = NULL;

886 size_t Sandbox::trapArraySize_ = 0;	959 size_t Sandbox::trap_array_size_ = 0;

887 bool Sandbox::has_unsafe_traps_ = false;	960 bool Sandbox::has_unsafe_traps_ = false;

	961 Sandbox::Conds Sandbox::conds_;

888	962

889 } // namespace	963 } // namespace

OLD	NEW