OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #define _GNU_SOURCE |
| 6 #include "init_process.h" |
| 7 |
| 8 #include <dirent.h> |
| 9 #include <fcntl.h> |
| 10 #include <signal.h> |
| 11 #include <stdbool.h> |
| 12 #include <stdio.h> |
| 13 #include <stdlib.h> |
| 14 #include <string.h> |
| 15 #include <sys/types.h> |
| 16 #include <sys/wait.h> |
| 17 #include <unistd.h> |
| 18 |
| 19 |
| 20 static int getProcessStatus(int proc_fd, const char *process, |
| 21 const char *field) { |
| 22 int ret = -1; |
| 23 |
| 24 // Open "/proc/${process}/status" |
| 25 char *buf = malloc(strlen(process) + 80); |
| 26 sprintf(buf, "%s/status", process); |
| 27 int fd = openat(proc_fd, buf, O_RDONLY); |
| 28 if (fd >= 0) { |
| 29 // Only bother to read the first 4kB. All of the fields that we |
| 30 // are interested in will show up much earlier. |
| 31 buf = realloc(buf, 4097); |
| 32 size_t sz = read(fd, buf, 4096); |
| 33 if (sz > 0) { |
| 34 // Find a matching "field" |
| 35 buf[sz] = '\000'; |
| 36 char *f = malloc(strlen(field) + 4); |
| 37 sprintf(f, "\n%s:\t", field); |
| 38 char *ptr = strstr(buf, f); |
| 39 if (ptr) { |
| 40 // Extract the numerical value of the "field" |
| 41 ret = atoi(ptr + strlen(f)); |
| 42 } |
| 43 free(f); |
| 44 } |
| 45 close(fd); |
| 46 } |
| 47 free(buf); |
| 48 return ret; |
| 49 } |
| 50 |
| 51 static bool hasChildren(int proc_fd, int pid) { |
| 52 bool ret = false; |
| 53 |
| 54 // Open "/proc" |
| 55 int fd = dup(proc_fd); |
| 56 lseek(fd, SEEK_SET, 0); |
| 57 DIR *dir = fd >= 0 ? fdopendir(fd) : NULL; |
| 58 struct dirent de, *res; |
| 59 while (dir && !readdir_r(dir, &de, &res) && res) { |
| 60 // Find numerical entries. Those are processes. |
| 61 if (res->d_name[0] <= '0' || res->d_name[0] > '9') { |
| 62 continue; |
| 63 } |
| 64 |
| 65 // For each process, check the parent's pid |
| 66 int ppid = getProcessStatus(proc_fd, res->d_name, "PPid"); |
| 67 |
| 68 if (ppid == pid) { |
| 69 // We found a child process. We can stop searching, now |
| 70 ret = true; |
| 71 break; |
| 72 } |
| 73 } |
| 74 closedir(dir); |
| 75 return ret; |
| 76 } |
| 77 |
| 78 void SystemInitProcess(int init_fd, int child_pid, int proc_fd, int null_fd) { |
| 79 int ret = 0; |
| 80 |
| 81 // CLONE_NEWPID doesn't adjust the contents of the "/proc" file system. |
| 82 // This is very confusing. And it is even possible the kernel developers |
| 83 // will consider this a bug and fix it at some point in the future. |
| 84 // So, to be on the safe side, we explicitly retrieve our process id |
| 85 // from the "/proc" file system. This should continue to work, even if |
| 86 // the kernel eventually gets fixed so that "/proc" shows the view from |
| 87 // inside of the new pid namespace. |
| 88 pid_t init_pid = getProcessStatus(proc_fd, "self", "Pid"); |
| 89 if (init_pid <= 0) { |
| 90 fprintf(stderr, |
| 91 "Failed to determine real process id of new \"init\" process\n"); |
| 92 _exit(1); |
| 93 } |
| 94 |
| 95 // Redirect stdio to /dev/null |
| 96 if (null_fd < 0 || |
| 97 dup2(null_fd, 0) != 0 || |
| 98 dup2(null_fd, 1) != 1 || |
| 99 dup2(null_fd, 2) != 2) { |
| 100 fprintf(stderr, "Failed to point stdio to a safe place\n"); |
| 101 _exit(1); |
| 102 } |
| 103 close(null_fd); |
| 104 |
| 105 // Close all file handles |
| 106 int fds_fd = openat(proc_fd, "self/fd", O_RDONLY | O_DIRECTORY); |
| 107 DIR *dir = fds_fd >= 0 ? fdopendir(fds_fd) : NULL; |
| 108 if (dir == NULL) { |
| 109 // If we don't know the list of our open file handles, just try closing |
| 110 // all valid ones. |
| 111 for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) { |
| 112 if (fd != init_fd && fd != proc_fd) { |
| 113 close(fd); |
| 114 } |
| 115 } |
| 116 } else { |
| 117 // If available, it is much more efficient to just close the file |
| 118 // handles that show up in "/proc/self/fd/" |
| 119 struct dirent de, *res; |
| 120 while (!readdir_r(dir, &de, &res) && res) { |
| 121 if (res->d_name[0] < '0') |
| 122 continue; |
| 123 int fd = atoi(res->d_name); |
| 124 if (fd > 2 && fd != init_fd && fd != proc_fd && fd != dirfd(dir)) { |
| 125 close(fd); |
| 126 } |
| 127 } |
| 128 closedir(dir); |
| 129 } |
| 130 |
| 131 // Set up signal handler to catch SIGCHLD, but mask the signal for now |
| 132 sigset_t mask; |
| 133 sigemptyset(&mask); |
| 134 sigaddset(&mask, SIGCHLD); |
| 135 sigprocmask(SIG_BLOCK, &mask, NULL); |
| 136 |
| 137 // Notify other processes that we are done initializing |
| 138 write(init_fd, " ", 1); |
| 139 close(init_fd); |
| 140 |
| 141 // Handle dying processes that have been re-parented to the "init" process |
| 142 for (;;) { |
| 143 // Wait until we receive a SIGCHLD signal. Our signal handler doesn't |
| 144 // actually need to do anything, though |
| 145 sigwaitinfo(&mask, NULL); |
| 146 |
| 147 bool retry = false; |
| 148 do { |
| 149 for (;;) { |
| 150 // Reap all exit codes of our child processes. This includes both |
| 151 // processes that originally were our immediate children, and processes |
| 152 // that have since been re-parented to be our children. |
| 153 int status; |
| 154 pid_t pid = waitpid(0, &status, __WALL | WNOHANG); |
| 155 if (pid <= 0) { |
| 156 break; |
| 157 } else { |
| 158 // We found some newly deceased child processes. Better schedule |
| 159 // another very thorough inspection of our state. |
| 160 retry = false; |
| 161 } |
| 162 if (pid == child_pid) { |
| 163 // If our first immediate child died, remember its exit code. That's |
| 164 // the exit code that we should be reporting to our parent process |
| 165 if (WIFEXITED(status)) { |
| 166 ret = WEXITSTATUS(status); |
| 167 } else if (WIFSIGNALED(status)) { |
| 168 ret = -WTERMSIG(status); |
| 169 } |
| 170 } |
| 171 } |
| 172 if (hasChildren(proc_fd, init_pid)) { |
| 173 // As long as we still have child processes, continue waiting for |
| 174 // their ultimate demise. |
| 175 retry = false; |
| 176 } else { |
| 177 if (retry) { |
| 178 // No more child processes. We can exit now. |
| 179 if (ret < 0) { |
| 180 // Try to exit with the same signal that our child terminated with |
| 181 signal(-ret, SIG_DFL); |
| 182 kill(1, -ret); |
| 183 ret = 1; |
| 184 } |
| 185 // Exit with the same exit code that our child exited with |
| 186 _exit(ret); |
| 187 } else { |
| 188 // There is a little bit of a race condition between getting |
| 189 // notifications and scanning the "/proc" file system. This is |
| 190 // particularly true, because scanning "/proc" cannot possibly be |
| 191 // an atomic operation. |
| 192 // If we find that we no longer appear to have any children, we check |
| 193 // one more time whether there are any children we can now reap. |
| 194 // They might have died while we were scanning "/proc" and if so, |
| 195 // they should now show up. |
| 196 retry = true; |
| 197 } |
| 198 } |
| 199 } while (retry); |
| 200 } |
| 201 } |
OLD | NEW |