Android 系统启动之 Init 进程启动分析一

本文基于 AOSP android-10.0.0_r41 版本讲解，内核版本 android-goldfish-4.14-gchips

在上一节，我们知道了系统的整个启动流程，对于 Framework 层，我们主要关心应用层的整个流程。

1.内核启动 init 进程

内核完成启动后，会通过 run_init_process 函数，启动用户空间的首个进程 init：

// 模拟器内核源码
// init/main.c
static int __ref kernel_init(void *unused)
{   
    // ......

    // ramdisk_execute_command = "/init"
    // 在根文件系统中找可执行文件 /init，如果有就执行它
	if (ramdisk_execute_command) {
		ret = run_init_process(ramdisk_execute_command);
		if (!ret)
			return 0;
		pr_err("Failed to execute %s (error %d)\n",
		       ramdisk_execute_command, ret);
	}


    //......
    
    // 如果 /init 可执行文件没有，就找其他地方的 init 可执行文件，并执行它
    if (execute_command) {
		ret = run_init_process(execute_command);
		if (!ret)
			return 0;
		panic("Requested init %s failed (error %d).",
		      execute_command, ret);
	}
	if (!try_to_run_init_process("/sbin/init") ||
	    !try_to_run_init_process("/etc/init") ||
	    !try_to_run_init_process("/bin/init") ||
	    !try_to_run_init_process("/bin/sh"))
		return 0;

内核会在各个路径寻找 init 可执行文件，并执行它。我手上刷了 AOSP android-10.0.0_r41 的 pixel4 其 init 程序预制在 /init。

2.init 进程

init 可执行文件对应源码在 system/core/init/main.cpp :

int main(int argc, char** argv) {
#if __has_feature(address_sanitizer)
    __asan_set_error_report_callback(AsanReportCallback);
#endif

    // 可执行文件名为 ueventd
    if (!strcmp(basename(argv[0]), "ueventd")) {
        return ueventd_main(argc, argv);
    }

    if (argc > 1) {
        // 第一个参数为 subcontext
        if (!strcmp(argv[1], "subcontext")) {
            android::base::InitLogging(argv, &android::base::KernelLogger);
            const BuiltinFunctionMap function_map;

            return SubcontextMain(argc, argv, &function_map);
        }

        // 第一个参数为 selinux_setup
        if (!strcmp(argv[1], "selinux_setup")) {
            return SetupSelinux(argv);
        }

        // 第一个参数为 second_stage
        if (!strcmp(argv[1], "second_stage")) {
            return SecondStageMain(argc, argv);
        }
    }

    // 没有参数执行这个函数
    return FirstStageMain(argc, argv);
}

这里会检测 main 函数的参数，上一节中我们看到内核启动 init 进程时，没有带参数，所以这里会直接执行到 FirstStageMain。

3.init 进程第一阶段 FirstStageMain 函数分析

第一阶段 FirstStageMain 函数的工作主要是挂载一些虚拟文件系统，还有挂载两个系统最核心的分区 system, vendor。

这两步都是为了后续的工作做准备。比如设置 selinux 需要从 system, vendor 分区中读取 sepolicy。 SecondStageMain 需要从 system, vendor 分区中读取 property 信息，需要启动系统核心服务。

3.1 FirstStageMain 函数第一部分

FirstStageMain 函数很长，我们先看第一部分：

// system/core/init/first_stage_init.cpp

int FirstStageMain(int argc, char** argv) {
    // init crash 时重启引导加载程序
    // 这个函数主要作用将各种信号量，如 SIGABRT,SIGBUS 等的行为设置为 SA_RESTART,一旦监听到这些信号即执行重启系统
    if (REBOOT_BOOTLOADER_ON_PANIC) {
        InstallRebootSignalHandlers();
    }

    boot_clock::time_point start_time = boot_clock::now();

    std::vector<std::pair<std::string, int>> errors;
#define CHECKCALL(x) \
    if (x != 0) errors.emplace_back(#x " failed", errno);

InstallRebootSignalHandlers() 将各种信号量，如 SIGABRT,SIGBUS 等的行为设置为 SA_RESTART，一旦监听到这些信号即执行重启系统：

void InstallRebootSignalHandlers() {
    // Instead of panic'ing the kernel as is the default behavior when init crashes,
    // we prefer to reboot to bootloader on development builds, as this will prevent
    // boot looping bad configurations and allow both developers and test farms to easily
    // recover.

    // 当init崩溃时，不会像默认行为那样恐慌内核，我们更喜欢在开发构建时重启到bootloader，因为这将防止引导错误的配置，让开发人员和测试人员都可以轻松地进行配置恢复。

    struct sigaction action;
    memset(&action, 0, sizeof(action));
    sigfillset(&action.sa_mask);
    action.sa_handler = [](int signal) {
        // These signal handlers are also caught for processes forked from init, however we do not
        // want them to trigger reboot, so we directly call _exit() for children processes here.

        // 从 init 派生的进程也会捕获这些信号处理程序，但我们不会希望它们触发重启，所以我们在这里直接对子进程调用_exit()
        if (getpid() != 1) {
            _exit(signal);
        }

        // Calling DoReboot() or LOG(FATAL) is not a good option as this is a signal handler.
        // RebootSystem uses syscall() which isn't actually async-signal-safe, but our only option
        // and probably good enough given this is already an error case and only enabled for
        // development builds.

        // 调用DoReboot()或LOG(FATAL)不是一个好的选择，因为这是一个信号处理程序。重启系统使用syscall()，这实际上不是异步信号安全的，但我们唯一的选择,考虑到这已经是一个错误情况，并且只在开发构建时启用，这可能已经足够了。
        InitFatalReboot();
    };
    action.sa_flags = SA_RESTART;
    sigaction(SIGABRT, &action, nullptr);
    sigaction(SIGBUS, &action, nullptr);
    sigaction(SIGFPE, &action, nullptr);
    sigaction(SIGILL, &action, nullptr);
    sigaction(SIGSEGV, &action, nullptr);
#if defined(SIGSTKFLT)
    sigaction(SIGSTKFLT, &action, nullptr);
#endif
    sigaction(SIGSYS, &action, nullptr);
    sigaction(SIGTRAP, &action, nullptr);
}

3.2 FirstStageMain 函数第二部分

    // Clear the umask.
    umask(0);

    CHECKCALL(clearenv());
    CHECKCALL(setenv("PATH", _PATH_DEFPATH, 1));
    // Get the basic filesystem setup we need put together in the initramdisk
    // on / and then we'll let the rc file figure out the rest.
    CHECKCALL(mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755"));
    CHECKCALL(mkdir("/dev/pts", 0755));
    CHECKCALL(mkdir("/dev/socket", 0755));
    CHECKCALL(mount("devpts", "/dev/pts", "devpts", 0, NULL));
#define MAKE_STR(x) __STRING(x)
    CHECKCALL(mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC)));
#undef MAKE_STR
    // Don't expose the raw commandline to unprivileged processes.
    CHECKCALL(chmod("/proc/cmdline", 0440));
    gid_t groups[] = {AID_READPROC};
    CHECKCALL(setgroups(arraysize(groups), groups));
    CHECKCALL(mount("sysfs", "/sys", "sysfs", 0, NULL));
    CHECKCALL(mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL));

    CHECKCALL(mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11)));

    if constexpr (WORLD_WRITABLE_KMSG) {
        CHECKCALL(mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11)));
    }

    CHECKCALL(mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8)));
    CHECKCALL(mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9)));

    // This is needed for log wrapper, which gets called before ueventd runs.
    CHECKCALL(mknod("/dev/ptmx", S_IFCHR | 0666, makedev(5, 2)));
    CHECKCALL(mknod("/dev/null", S_IFCHR | 0666, makedev(1, 3)));

    // These below mounts are done in first stage init so that first stage mount can mount
    // subdirectories of /mnt/{vendor,product}/.  Other mounts, not required by first stage mount,
    // should be done in rc files.
    // Mount staging areas for devices managed by vold
    // See storage config details at http://source.android.com/devices/storage/
    CHECKCALL(mount("tmpfs", "/mnt", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
                    "mode=0755,uid=0,gid=1000"));
    // /mnt/vendor is used to mount vendor-specific partitions that can not be
    // part of the vendor partition, e.g. because they are mounted read-write.
    CHECKCALL(mkdir("/mnt/vendor", 0755));
    // /mnt/product is used to mount product-specific partitions that can not be
    // part of the product partition, e.g. because they are mounted read-write.
    CHECKCALL(mkdir("/mnt/product", 0755));

    // /apex is used to mount APEXes
    CHECKCALL(mount("tmpfs", "/apex", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
                    "mode=0755,uid=0,gid=0"));

    // /debug_ramdisk is used to preserve additional files from the debug ramdisk
    CHECKCALL(mount("tmpfs", "/debug_ramdisk", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
                    "mode=0755,uid=0,gid=0"));
#undef CHECKCALL

这部分内容主要是挂载分区，创建设备节点和一些关键目录。

挂载的文件系统主要有四类：

tmpfs: 一种虚拟内存文件系统，它会将所有的文件存储在内存中。由于 tmpfs 是驻留在 RAM 的，因此它的内容是不持久的。断电后，tmpfs 的内容就消失了，这也是被称作 tmpfs 的根本原因。
devpts: 为伪终端提供了一个标准接口，它的标准挂接点是 /dev/pts。只要 pty(pseudo-tty, 虚拟终端)的主复合设备 /dev/ptmx 被打开，就会在 /dev/pts 下动态的创建一个新的 pty 设备文件。
proc: 也是一个虚拟文件系统，它可以看作是内核内部数据结构的接口，通过它我们可以获得系统的信息，同时也能够在运行时修改特定的内核参数。
sysfs: 与 proc 文件系统类似，也是一个不占有任何磁盘空间的虚拟文件系统。它通常被挂接在 /sys 目录下。

3.3 FirstStageMain 函数第三部分

    SetStdioToDevNull(argv);

其实现如下：

// SetStdioToDevNull 函数的作用是将标准输入（stdin）、标准输出（stdout）和标准错误（stderr）重定向到特殊的设备文件 "/dev/null"
void SetStdioToDevNull(char** argv) {
    // Make stdin/stdout/stderr all point to /dev/null.
    int fd = open("/dev/null", O_RDWR);
    if (fd == -1) {
        int saved_errno = errno;
        android::base::InitLogging(argv, &android::base::KernelLogger, InitAborter);
        errno = saved_errno;
        PLOG(FATAL) << "Couldn't open /dev/null";
    }
    dup2(fd, STDIN_FILENO);
    dup2(fd, STDOUT_FILENO);
    dup2(fd, STDERR_FILENO);
    if (fd > STDERR_FILENO) close(fd);
}

这里主要是把 stdin stdout stderr 重定向到 /dev/null，也就是说 print 函数是看不到输出的。

3.4 FirstStageMain 函数第四部分

    // Now that tmpfs is mounted on /dev and we have /dev/kmsg, we can actually
    // talk to the outside world...

    // Android系统 init 进程启动的时候，log 子系统没有启动起来， 但是我们仍然可以可以使用 logcat -b kernel 看到 init 进程的日志， 这是怎么做到的呢？其实是通过日志重定向来完成的
    InitKernelLogging(argv);
    
    //.....

    LOG(INFO) << "init first stage started!";

Android 系统 init 进程启动的时候，log 子系统没有启动起来，但是我们仍然可以可以使用 logcat -b kernel 看到 init 进程的日志，这是怎么做到的呢？其实是通过日志重定向来完成的。

我们先看看 InitKernelLogging 函数的具体实现：

void InitKernelLogging(char** argv) {
    SetFatalRebootTarget();  // 空实现
    android::base::InitLogging(argv, &android::base::KernelLogger, InitAborter);
}

接着调用 InitLogging：

void InitLogging(char* argv[], LogFunction&& logger, AbortFunction&& aborter) {
  SetLogger(std::forward<LogFunction>(logger));
  SetAborter(std::forward<AbortFunction>(aborter));

  if (gInitialized) {
    return;
  }

  gInitialized = true;

  // Stash the command line for later use. We can use /proc/self/cmdline on
  // Linux to recover this, but we don't have that luxury on the Mac/Windows,
  // and there are a couple of argv[0] variants that are commonly used.
  if (argv != nullptr) {
    SetDefaultTag(basename(argv[0]));
  }

  const char* tags = getenv("ANDROID_LOG_TAGS");
  if (tags == nullptr) {
    return;
  }

    // 根据日志内容，设置 gMinimumLogSeverity 的值
  std::vector<std::string> specs = Split(tags, " ");
  for (size_t i = 0; i < specs.size(); ++i) {
    // "tag-pattern:[vdiwefs]"
    std::string spec(specs[i]);
    if (spec.size() == 3 && StartsWith(spec, "*:")) {
      switch (spec[2]) {
        case 'v':
          gMinimumLogSeverity = VERBOSE;
          continue;
        case 'd':
          gMinimumLogSeverity = DEBUG;
          continue;
        case 'i':
          gMinimumLogSeverity = INFO;
          continue;
        case 'w':
          gMinimumLogSeverity = WARNING;
          continue;
        case 'e':
          gMinimumLogSeverity = ERROR;
          continue;
        case 'f':
          gMinimumLogSeverity = FATAL_WITHOUT_ABORT;
          continue;
        // liblog will even suppress FATAL if you say 's' for silent, but that's
        // crazy!
        case 's':
          gMinimumLogSeverity = FATAL_WITHOUT_ABORT;
          continue;
      }
    }
    LOG(FATAL) << "unsupported '" << spec << "' in ANDROID_LOG_TAGS (" << tags
               << ")";
  }
}

调用 SetLogger 和 SetAborter 来设置两个变量的值
根据日志内容，设置 gMinimumLogSeverity 的值

// 调用过程
SetLogger(std::forward<LogFunction>(logger));
SetAborter(std::forward<AbortFunction>(aborter));

// 具体实现
// system/core/base/logging.cpp
void SetLogger(LogFunction&& logger) {
  std::lock_guard<std::mutex> lock(LoggingLock());
  Logger() = std::move(logger);
}

void SetAborter(AbortFunction&& aborter) {
  std::lock_guard<std::mutex> lock(LoggingLock());
  Aborter() = std::move(aborter);
}

static LogFunction& Logger() {
#ifdef __ANDROID__
  static auto& logger = *new LogFunction(LogdLogger());
#else
  static auto& logger = *new LogFunction(StderrLogger);
#endif
  return logger;
}

static AbortFunction& Aborter() {
  static auto& aborter = *new AbortFunction(DefaultAborter);
  return aborter;
}

可以看出，这里就是给两个静态变量赋值。

我们重点关注下 SetLogger 函数设置的 KernelLogger：

#if defined(__linux__)
void KernelLogger(android::base::LogId, android::base::LogSeverity severity,
                  const char* tag, const char*, unsigned int, const char* msg) {
  // clang-format off
  static constexpr int kLogSeverityToKernelLogLevel[] = {
      [android::base::VERBOSE] = 7,              // KERN_DEBUG (there is no verbose kernel log
                                                 //             level)
      [android::base::DEBUG] = 7,                // KERN_DEBUG
      [android::base::INFO] = 6,                 // KERN_INFO
      [android::base::WARNING] = 4,              // KERN_WARNING
      [android::base::ERROR] = 3,                // KERN_ERROR
      [android::base::FATAL_WITHOUT_ABORT] = 2,  // KERN_CRIT
      [android::base::FATAL] = 2,                // KERN_CRIT
  };
  // clang-format on
  static_assert(arraysize(kLogSeverityToKernelLogLevel) == android::base::FATAL + 1,
                "Mismatch in size of kLogSeverityToKernelLogLevel and values in LogSeverity");

  static int klog_fd = OpenKmsg();
  if (klog_fd == -1) return;

  int level = kLogSeverityToKernelLogLevel[severity];

  // The kernel's printk buffer is only 1024 bytes.
  // TODO: should we automatically break up long lines into multiple lines?
  // Or we could log but with something like "..." at the end?
  char buf[1024];
  size_t size = snprintf(buf, sizeof(buf), "<%d>%s: %s\n", level, tag, msg);
  if (size > sizeof(buf)) {
    size = snprintf(buf, sizeof(buf), "<%d>%s: %zu-byte message too long for printk\n",
                    level, tag, size);
  }

  iovec iov[1];
  iov[0].iov_base = buf;
  iov[0].iov_len = size;
  TEMP_FAILURE_RETRY(writev(klog_fd, iov, 1));
}
#endif

KernelLogger 是一个函数，其内部实现就是打开 /dev/dmsg，拿到 fd，然后把日志信息格式化一下，接着把日志 write 到前面拿到的 fd 中。

在 init 中通常使用 LOG 宏来写日志：

#define LOG(severity) LOG_TO(DEFAULT, severity)

#define LOG_TO(dest, severity) LOGGING_PREAMBLE(severity) && LOG_STREAM_TO(dest, severity)

#define LOG_STREAM_TO(dest, severity)                                           \
  ::android::base::LogMessage(__FILE__, __LINE__, ::android::base::dest,        \
                              SEVERITY_LAMBDA(severity), _LOG_TAG_INTERNAL, -1) \
      .stream()

LOG 宏最终会构建一个 LogMessage 对象，在 LogMessage 的析构函数中会调用上面的 KernelLogger 函数写日志到 /dev/dmsg 中。

LogMessage::~LogMessage() {
  // Check severity again. This is duplicate work wrt/ LOG macros, but not LOG_STREAM.
  if (!WOULD_LOG(data_->GetSeverity())) {
    return;
  }

  // Finish constructing the message.
  if (data_->GetError() != -1) {
    data_->GetBuffer() << ": " << strerror(data_->GetError());
  }
  std::string msg(data_->ToString());

  if (data_->GetSeverity() == FATAL) {
#ifdef __ANDROID__
    // Set the bionic abort message early to avoid liblog doing it
    // with the individual lines, so that we get the whole message.
    android_set_abort_message(msg.c_str());
#endif
  }

  {
    // Do the actual logging with the lock held.
    std::lock_guard<std::mutex> lock(LoggingLock());
    if (msg.find('\n') == std::string::npos) {
      LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetId(), data_->GetSeverity(),
              data_->GetTag(), msg.c_str());
    } else {
      msg += '\n';
      size_t i = 0;
      while (i < msg.size()) {
        size_t nl = msg.find('\n', i);
        msg[nl] = '\0';
        LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetId(), data_->GetSeverity(),
                data_->GetTag(), &msg[i]);
        // Undo the zero-termination so we can give the complete message to the aborter.
        msg[nl] = '\n';
        i = nl + 1;
      }
    }
  }

  // Abort if necessary.
  if (data_->GetSeverity() == FATAL) {
    Aborter()(msg.c_str());
  }
}

代码很多，核心就是调用 LogLine 来打印日志：

void LogMessage::LogLine(const char* file, unsigned int line, LogId id, LogSeverity severity,
                         const char* tag, const char* message) {
  if (tag == nullptr) {
    std::lock_guard<std::recursive_mutex> lock(TagLock());
    if (gDefaultTag == nullptr) {
      gDefaultTag = new std::string(getprogname());
    }
    Logger()(id, severity, gDefaultTag->c_str(), file, line, message);
  } else {
    Logger()(id, severity, tag, file, line, message);
  }
}

这里就会通过 logger 函数获取到我们前面设置的 KernelLogger，然后调用它，把日志信息写入到 /dev/dmsg 中。

在析构函数中来打印日志，可以有效的避免打印日志对 init 启动时间的影响。

3.5 FirstStageMain 函数第五部分

我们接着看 FirstStageMain 函数第五部分：

    // ......

    /* 
     * 主要作用是去解析 fstab 文件,
     * 然后得到 "/system", "/vendor", "/odm" 三个目录的挂载信息
     * 装载 fstab 中指定的分区
     */
    if 
    if (!DoFirstStageMount()) {
        LOG(FATAL) << "Failed to mount required partitions early ...";
    }

bool DoFirstStageMount() {
    // Skips first stage mount if we're in recovery mode.
    if (IsRecoveryMode()) {
        LOG(INFO) << "First stage mount skipped (recovery mode)";
        return true;
    }

    // AVB校验
    std::unique_ptr<FirstStageMount> handle = FirstStageMount::Create();
    if (!handle) {
        LOG(ERROR) << "Failed to create FirstStageMount";
        return false;
    }
    return handle->DoFirstStageMount();
}

先看第一点 auto fsm = FirstStageMount::Create();，该方法主要用于AVB校验，AVB校验可以去看看 google 文档：

std::unique_ptr<FirstStageMount> FirstStageMount::Create() {
    // 读取 fstab, file system table,里面包含了要挂载的逻辑分区
    auto fstab = ReadFirstStageFstab();
    // 判断device tree（fstabl）中是否有vbmeta/compatible结构，值是android,vbmeta
	// 创建FirstStageMountVBootV1或者FirstStageMountVBootV2实例，取决于	
	// IsDtVbmetaCompatible(fstab)的返回值，如果支持vbmeta，则使用FirstStageMountVBootV2,反之FirstStageMountVBootV1
    if (IsDtVbmetaCompatible(fstab)) {
        return std::make_unique<FirstStageMountVBootV2>(std::move(fstab));
    } else {
        return std::make_unique<FirstStageMountVBootV1>(std::move(fstab));
    }
}

以上主要是创建 V1 或者 V2 版本的 AVB 校验，AV B校验主要是针对分区进行校验，对于要启动的 Android 版本中包含的所有可执行代码和数据，启动时验证均要求在使用前以加密形式对其进行验证。包括内核（从 boot 分区加载）、设备树（从 dtbo 分区加载）、system 分区和 vendor 分区等

对于 boot 和 dtbo 这类仅读取一次的小分区，通常是通过将整个内容加载到内存中，然后计算其哈希值来进行验证
内存装不下的较大分区（如文件系统）可能会使用哈希树；
如果在某个时刻计算出的根哈希值与预期根哈希值不一致，系统便不会使用相应数据，无法启动 Android 系统

我们继续看 handle->DoFirstStageMount() 的实现：

bool FirstStageMount::DoFirstStageMount() {
    if (!IsDmLinearEnabled() && fstab_.empty()) {
        // Nothing to mount.
        LOG(INFO) << "First stage mount skipped (missing/incompatible/empty fstab in device tree)";
        return true;
    }

    if (!InitDevices()) return false;

    if (!CreateLogicalPartitions()) return false;

    if (!MountPartitions()) return false;

    return true;
}

这里的逻辑很清晰，初始化设备，创建逻辑分区，最后挂载分区。

里面具体的实现我们就不在分析了，这个不是我们关注的重点。

3.6 FirstStageMain 函数第六部分

    // .....

    const char* path = "/system/bin/init";
    const char* args[] = {path, "selinux_setup", nullptr};
    execv(path, const_cast<char**>(args));

    // execv() only returns if an error happened, in which case we
    // panic and never fall through this conditional.
    PLOG(FATAL) << "execv(\"" << path << "\") failed";

    return 1;
}

最后部分，通过 selinux_setup 参数，execv 再次执行 init 可执行文件，进入 init 过程的第二阶段。