| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #define _GNU_SOURCE | |
| 6 #include "init_process.h" | |
| 7 | |
| 8 #include <dirent.h> | |
| 9 #include <fcntl.h> | |
| 10 #include <signal.h> | |
| 11 #include <stdbool.h> | |
| 12 #include <stdio.h> | |
| 13 #include <stdlib.h> | |
| 14 #include <string.h> | |
| 15 #include <sys/types.h> | |
| 16 #include <sys/wait.h> | |
| 17 #include <unistd.h> | |
| 18 | |
| 19 | |
| 20 static int getProcessStatus(int proc_fd, const char *process, | |
| 21 const char *field) { | |
| 22 int ret = -1; | |
| 23 | |
| 24 // Open "/proc/${process}/status" | |
| 25 char *buf = malloc(strlen(process) + 80); | |
| 26 sprintf(buf, "%s/status", process); | |
| 27 int fd = openat(proc_fd, buf, O_RDONLY); | |
| 28 if (fd >= 0) { | |
| 29 // Only bother to read the first 4kB. All of the fields that we | |
| 30 // are interested in will show up much earlier. | |
| 31 buf = realloc(buf, 4097); | |
| 32 size_t sz = read(fd, buf, 4096); | |
| 33 if (sz > 0) { | |
| 34 // Find a matching "field" | |
| 35 buf[sz] = '\000'; | |
| 36 char *f = malloc(strlen(field) + 4); | |
| 37 sprintf(f, "\n%s:\t", field); | |
| 38 char *ptr = strstr(buf, f); | |
| 39 if (ptr) { | |
| 40 // Extract the numerical value of the "field" | |
| 41 ret = atoi(ptr + strlen(f)); | |
| 42 } | |
| 43 free(f); | |
| 44 } | |
| 45 close(fd); | |
| 46 } | |
| 47 free(buf); | |
| 48 return ret; | |
| 49 } | |
| 50 | |
| 51 static bool hasChildren(int proc_fd, int pid) { | |
| 52 bool ret = false; | |
| 53 | |
| 54 // Open "/proc" | |
| 55 int fd = dup(proc_fd); | |
| 56 lseek(fd, SEEK_SET, 0); | |
| 57 DIR *dir = fd >= 0 ? fdopendir(fd) : NULL; | |
| 58 struct dirent de, *res; | |
| 59 while (dir && !readdir_r(dir, &de, &res) && res) { | |
| 60 // Find numerical entries. Those are processes. | |
| 61 if (res->d_name[0] <= '0' || res->d_name[0] > '9') { | |
| 62 continue; | |
| 63 } | |
| 64 | |
| 65 // For each process, check the parent's pid | |
| 66 int ppid = getProcessStatus(proc_fd, res->d_name, "PPid"); | |
| 67 | |
| 68 if (ppid == pid) { | |
| 69 // We found a child process. We can stop searching, now | |
| 70 ret = true; | |
| 71 break; | |
| 72 } | |
| 73 } | |
| 74 closedir(dir); | |
| 75 return ret; | |
| 76 } | |
| 77 | |
| 78 void SystemInitProcess(int init_fd, int child_pid, int proc_fd, int null_fd) { | |
| 79 int ret = 0; | |
| 80 | |
| 81 // CLONE_NEWPID doesn't adjust the contents of the "/proc" file system. | |
| 82 // This is very confusing. And it is even possible the kernel developers | |
| 83 // will consider this a bug and fix it at some point in the future. | |
| 84 // So, to be on the safe side, we explicitly retrieve our process id | |
| 85 // from the "/proc" file system. This should continue to work, even if | |
| 86 // the kernel eventually gets fixed so that "/proc" shows the view from | |
| 87 // inside of the new pid namespace. | |
| 88 pid_t init_pid = getProcessStatus(proc_fd, "self", "Pid"); | |
| 89 if (init_pid <= 0) { | |
| 90 fprintf(stderr, | |
| 91 "Failed to determine real process id of new \"init\" process\n"); | |
| 92 _exit(1); | |
| 93 } | |
| 94 | |
| 95 // Redirect stdio to /dev/null | |
| 96 if (null_fd < 0 || | |
| 97 dup2(null_fd, 0) != 0 || | |
| 98 dup2(null_fd, 1) != 1 || | |
| 99 dup2(null_fd, 2) != 2) { | |
| 100 fprintf(stderr, "Failed to point stdio to a safe place\n"); | |
| 101 _exit(1); | |
| 102 } | |
| 103 close(null_fd); | |
| 104 | |
| 105 // Close all file handles | |
| 106 int fds_fd = openat(proc_fd, "self/fd", O_RDONLY | O_DIRECTORY); | |
| 107 DIR *dir = fds_fd >= 0 ? fdopendir(fds_fd) : NULL; | |
| 108 if (dir == NULL) { | |
| 109 // If we don't know the list of our open file handles, just try closing | |
| 110 // all valid ones. | |
| 111 for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) { | |
| 112 if (fd != init_fd && fd != proc_fd) { | |
| 113 close(fd); | |
| 114 } | |
| 115 } | |
| 116 } else { | |
| 117 // If available, it is much more efficient to just close the file | |
| 118 // handles that show up in "/proc/self/fd/" | |
| 119 struct dirent de, *res; | |
| 120 while (!readdir_r(dir, &de, &res) && res) { | |
| 121 if (res->d_name[0] < '0') | |
| 122 continue; | |
| 123 int fd = atoi(res->d_name); | |
| 124 if (fd > 2 && fd != init_fd && fd != proc_fd && fd != dirfd(dir)) { | |
| 125 close(fd); | |
| 126 } | |
| 127 } | |
| 128 closedir(dir); | |
| 129 } | |
| 130 | |
| 131 // Set up signal handler to catch SIGCHLD, but mask the signal for now | |
| 132 sigset_t mask; | |
| 133 sigemptyset(&mask); | |
| 134 sigaddset(&mask, SIGCHLD); | |
| 135 sigprocmask(SIG_BLOCK, &mask, NULL); | |
| 136 | |
| 137 // Notify other processes that we are done initializing | |
| 138 if (write(init_fd, " ", 1)) { } | |
| 139 close(init_fd); | |
| 140 | |
| 141 // Handle dying processes that have been re-parented to the "init" process | |
| 142 for (;;) { | |
| 143 bool retry = false; | |
| 144 do { | |
| 145 for (;;) { | |
| 146 // Reap all exit codes of our child processes. This includes both | |
| 147 // processes that originally were our immediate children, and processes | |
| 148 // that have since been re-parented to be our children. | |
| 149 int status; | |
| 150 pid_t pid = waitpid(0, &status, __WALL | WNOHANG); | |
| 151 if (pid <= 0) { | |
| 152 break; | |
| 153 } else { | |
| 154 // We found some newly deceased child processes. Better schedule | |
| 155 // another very thorough inspection of our state. | |
| 156 retry = false; | |
| 157 } | |
| 158 if (pid == child_pid) { | |
| 159 // If our first immediate child died, remember its exit code. That's | |
| 160 // the exit code that we should be reporting to our parent process | |
| 161 if (WIFEXITED(status)) { | |
| 162 ret = WEXITSTATUS(status); | |
| 163 } else if (WIFSIGNALED(status)) { | |
| 164 ret = -WTERMSIG(status); | |
| 165 } | |
| 166 } | |
| 167 } | |
| 168 if (hasChildren(proc_fd, init_pid)) { | |
| 169 // As long as we still have child processes, continue waiting for | |
| 170 // their ultimate demise. | |
| 171 retry = false; | |
| 172 } else { | |
| 173 if (retry) { | |
| 174 // No more child processes. We can exit now. | |
| 175 if (ret < 0) { | |
| 176 // Try to exit with the same signal that our child terminated with | |
| 177 signal(-ret, SIG_DFL); | |
| 178 kill(1, -ret); | |
| 179 ret = 1; | |
| 180 } | |
| 181 // Exit with the same exit code that our child exited with | |
| 182 _exit(ret); | |
| 183 } else { | |
| 184 // There is a little bit of a race condition between getting | |
| 185 // notifications and scanning the "/proc" file system. This is | |
| 186 // particularly true, because scanning "/proc" cannot possibly be | |
| 187 // an atomic operation. | |
| 188 // If we find that we no longer appear to have any children, we check | |
| 189 // one more time whether there are any children we can now reap. | |
| 190 // They might have died while we were scanning "/proc" and if so, | |
| 191 // they should now show up. | |
| 192 retry = true; | |
| 193 } | |
| 194 } | |
| 195 } while (retry); | |
| 196 | |
| 197 // Wait until we receive a SIGCHLD signal. Our signal handler doesn't | |
| 198 // actually need to do anything, though | |
| 199 sigwaitinfo(&mask, NULL); | |
| 200 } | |
| 201 } | |
| OLD | NEW |