OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #define _GNU_SOURCE | |
6 #include "init_process.h" | |
7 | |
8 #include <dirent.h> | |
9 #include <fcntl.h> | |
10 #include <signal.h> | |
11 #include <stdbool.h> | |
12 #include <stdio.h> | |
13 #include <stdlib.h> | |
14 #include <string.h> | |
15 #include <sys/types.h> | |
16 #include <sys/wait.h> | |
17 #include <unistd.h> | |
18 | |
19 | |
20 static int getProcessStatus(int proc_fd, const char *process, | |
21 const char *field) { | |
22 int ret = -1; | |
23 | |
24 // Open "/proc/${process}/status" | |
25 char *buf = malloc(strlen(process) + 80); | |
26 sprintf(buf, "%s/status", process); | |
27 int fd = openat(proc_fd, buf, O_RDONLY); | |
28 if (fd >= 0) { | |
29 // Only bother to read the first 4kB. All of the fields that we | |
30 // are interested in will show up much earlier. | |
31 buf = realloc(buf, 4097); | |
32 size_t sz = read(fd, buf, 4096); | |
33 if (sz > 0) { | |
34 // Find a matching "field" | |
35 buf[sz] = '\000'; | |
36 char *f = malloc(strlen(field) + 4); | |
37 sprintf(f, "\n%s:\t", field); | |
38 char *ptr = strstr(buf, f); | |
39 if (ptr) { | |
40 // Extract the numerical value of the "field" | |
41 ret = atoi(ptr + strlen(f)); | |
42 } | |
43 free(f); | |
44 } | |
45 close(fd); | |
46 } | |
47 free(buf); | |
48 return ret; | |
49 } | |
50 | |
51 static bool hasChildren(int proc_fd, int pid) { | |
52 bool ret = false; | |
53 | |
54 // Open "/proc" | |
55 int fd = dup(proc_fd); | |
56 lseek(fd, SEEK_SET, 0); | |
57 DIR *dir = fd >= 0 ? fdopendir(fd) : NULL; | |
58 struct dirent de, *res; | |
59 while (dir && !readdir_r(dir, &de, &res) && res) { | |
60 // Find numerical entries. Those are processes. | |
61 if (res->d_name[0] <= '0' || res->d_name[0] > '9') { | |
62 continue; | |
63 } | |
64 | |
65 // For each process, check the parent's pid | |
66 int ppid = getProcessStatus(proc_fd, res->d_name, "PPid"); | |
67 | |
68 if (ppid == pid) { | |
69 // We found a child process. We can stop searching, now | |
70 ret = true; | |
71 break; | |
72 } | |
73 } | |
74 closedir(dir); | |
75 return ret; | |
76 } | |
77 | |
78 void SystemInitProcess(int init_fd, int child_pid, int proc_fd, int null_fd) { | |
79 int ret = 0; | |
80 | |
81 // CLONE_NEWPID doesn't adjust the contents of the "/proc" file system. | |
82 // This is very confusing. And it is even possible the kernel developers | |
83 // will consider this a bug and fix it at some point in the future. | |
84 // So, to be on the safe side, we explicitly retrieve our process id | |
85 // from the "/proc" file system. This should continue to work, even if | |
86 // the kernel eventually gets fixed so that "/proc" shows the view from | |
87 // inside of the new pid namespace. | |
88 pid_t init_pid = getProcessStatus(proc_fd, "self", "Pid"); | |
89 if (init_pid <= 0) { | |
90 fprintf(stderr, | |
91 "Failed to determine real process id of new \"init\" process\n"); | |
92 _exit(1); | |
93 } | |
94 | |
95 // Redirect stdio to /dev/null | |
96 if (null_fd < 0 || | |
97 dup2(null_fd, 0) != 0 || | |
98 dup2(null_fd, 1) != 1 || | |
99 dup2(null_fd, 2) != 2) { | |
100 fprintf(stderr, "Failed to point stdio to a safe place\n"); | |
101 _exit(1); | |
102 } | |
103 close(null_fd); | |
104 | |
105 // Close all file handles | |
106 int fds_fd = openat(proc_fd, "self/fd", O_RDONLY | O_DIRECTORY); | |
107 DIR *dir = fds_fd >= 0 ? fdopendir(fds_fd) : NULL; | |
108 if (dir == NULL) { | |
109 // If we don't know the list of our open file handles, just try closing | |
110 // all valid ones. | |
111 for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) { | |
112 if (fd != init_fd && fd != proc_fd) { | |
113 close(fd); | |
114 } | |
115 } | |
116 } else { | |
117 // If available, it is much more efficient to just close the file | |
118 // handles that show up in "/proc/self/fd/" | |
119 struct dirent de, *res; | |
120 while (!readdir_r(dir, &de, &res) && res) { | |
121 if (res->d_name[0] < '0') | |
122 continue; | |
123 int fd = atoi(res->d_name); | |
124 if (fd > 2 && fd != init_fd && fd != proc_fd && fd != dirfd(dir)) { | |
125 close(fd); | |
126 } | |
127 } | |
128 closedir(dir); | |
129 } | |
130 | |
131 // Set up signal handler to catch SIGCHLD, but mask the signal for now | |
132 sigset_t mask; | |
133 sigemptyset(&mask); | |
134 sigaddset(&mask, SIGCHLD); | |
135 sigprocmask(SIG_BLOCK, &mask, NULL); | |
136 | |
137 // Notify other processes that we are done initializing | |
138 if (write(init_fd, " ", 1)) { } | |
139 close(init_fd); | |
140 | |
141 // Handle dying processes that have been re-parented to the "init" process | |
142 for (;;) { | |
143 bool retry = false; | |
144 do { | |
145 for (;;) { | |
146 // Reap all exit codes of our child processes. This includes both | |
147 // processes that originally were our immediate children, and processes | |
148 // that have since been re-parented to be our children. | |
149 int status; | |
150 pid_t pid = waitpid(0, &status, __WALL | WNOHANG); | |
151 if (pid <= 0) { | |
152 break; | |
153 } else { | |
154 // We found some newly deceased child processes. Better schedule | |
155 // another very thorough inspection of our state. | |
156 retry = false; | |
157 } | |
158 if (pid == child_pid) { | |
159 // If our first immediate child died, remember its exit code. That's | |
160 // the exit code that we should be reporting to our parent process | |
161 if (WIFEXITED(status)) { | |
162 ret = WEXITSTATUS(status); | |
163 } else if (WIFSIGNALED(status)) { | |
164 ret = -WTERMSIG(status); | |
165 } | |
166 } | |
167 } | |
168 if (hasChildren(proc_fd, init_pid)) { | |
169 // As long as we still have child processes, continue waiting for | |
170 // their ultimate demise. | |
171 retry = false; | |
172 } else { | |
173 if (retry) { | |
174 // No more child processes. We can exit now. | |
175 if (ret < 0) { | |
176 // Try to exit with the same signal that our child terminated with | |
177 signal(-ret, SIG_DFL); | |
178 kill(1, -ret); | |
179 ret = 1; | |
180 } | |
181 // Exit with the same exit code that our child exited with | |
182 _exit(ret); | |
183 } else { | |
184 // There is a little bit of a race condition between getting | |
185 // notifications and scanning the "/proc" file system. This is | |
186 // particularly true, because scanning "/proc" cannot possibly be | |
187 // an atomic operation. | |
188 // If we find that we no longer appear to have any children, we check | |
189 // one more time whether there are any children we can now reap. | |
190 // They might have died while we were scanning "/proc" and if so, | |
191 // they should now show up. | |
192 retry = true; | |
193 } | |
194 } | |
195 } while (retry); | |
196 | |
197 // Wait until we receive a SIGCHLD signal. Our signal handler doesn't | |
198 // actually need to do anything, though | |
199 sigwaitinfo(&mask, NULL); | |
200 } | |
201 } | |
OLD | NEW |