Fix potential use-after-free bug in reboot

Instead of operating on raw pointers, init now uses name of the
services as it's primary identifier. Only place that still uses
vector<Service*> is StopServices.

In addition, ServiceList::services() function is removed, which should
help avoiding similar bugs in the future.

Bug: 170315126
Bug: 174335499
Test: adb reboot
Test: atest CtsInitTestCases
Change-Id: I73ecd7a8c58c2ec3732934c595b7f7db814b7034
Merged-In: I73ecd7a8c58c2ec3732934c595b7f7db814b7034
(cherry picked from commit bbba64826fdfb7be1f04b7bbb125fbe6d25e5028)
diff --git a/init/lmkd_service.cpp b/init/lmkd_service.cpp
index dd1ab4d..c982925 100644
--- a/init/lmkd_service.cpp
+++ b/init/lmkd_service.cpp
@@ -79,7 +79,7 @@
 }
 
 static void RegisterServices(pid_t exclude_pid) {
-    for (const auto& service : ServiceList::GetInstance().services()) {
+    for (const auto& service : ServiceList::GetInstance()) {
         auto svc = service.get();
         if (svc->oom_score_adjust() != DEFAULT_OOM_SCORE_ADJUST) {
             // skip if process is excluded or not yet forked (pid==0)
diff --git a/init/reboot.cpp b/init/reboot.cpp
index 23a07aa..a4fb08e 100644
--- a/init/reboot.cpp
+++ b/init/reboot.cpp
@@ -85,12 +85,11 @@
 
 static const std::set<std::string> kDebuggingServices{"tombstoned", "logd", "adbd", "console"};
 
-static std::vector<Service*> GetDebuggingServices(bool only_post_data) {
-    std::vector<Service*> ret;
-    ret.reserve(kDebuggingServices.size());
+static std::set<std::string> GetPostDataDebuggingServices() {
+    std::set<std::string> ret;
     for (const auto& s : ServiceList::GetInstance()) {
-        if (kDebuggingServices.count(s->name()) && (!only_post_data || s->is_post_data())) {
-            ret.push_back(s.get());
+        if (kDebuggingServices.count(s->name()) && s->is_post_data()) {
+            ret.insert(s->name());
         }
     }
     return ret;
@@ -503,13 +502,18 @@
 
 // Stops given services, waits for them to be stopped for |timeout| ms.
 // If terminate is true, then SIGTERM is sent to services, otherwise SIGKILL is sent.
-static void StopServices(const std::vector<Service*>& services, std::chrono::milliseconds timeout,
+// Note that services are stopped in order given by |ServiceList::services_in_shutdown_order|
+// function.
+static void StopServices(const std::set<std::string>& services, std::chrono::milliseconds timeout,
                          bool terminate) {
     LOG(INFO) << "Stopping " << services.size() << " services by sending "
               << (terminate ? "SIGTERM" : "SIGKILL");
     std::vector<pid_t> pids;
     pids.reserve(services.size());
-    for (const auto& s : services) {
+    for (const auto& s : ServiceList::GetInstance().services_in_shutdown_order()) {
+        if (services.count(s->name()) == 0) {
+            continue;
+        }
         if (s->pid() > 0) {
             pids.push_back(s->pid());
         }
@@ -529,12 +533,12 @@
 
 // Like StopServices, but also logs all the services that failed to stop after the provided timeout.
 // Returns number of violators.
-static int StopServicesAndLogViolations(const std::vector<Service*>& services,
+static int StopServicesAndLogViolations(const std::set<std::string>& services,
                                         std::chrono::milliseconds timeout, bool terminate) {
     StopServices(services, timeout, terminate);
     int still_running = 0;
-    for (const auto& s : services) {
-        if (s->IsRunning()) {
+    for (const auto& s : ServiceList::GetInstance()) {
+        if (s->IsRunning() && services.count(s->name())) {
             LOG(ERROR) << "[service-misbehaving] : service '" << s->name() << "' is still running "
                        << timeout.count() << "ms after receiving "
                        << (terminate ? "SIGTERM" : "SIGKILL");
@@ -608,8 +612,7 @@
 
     // watchdogd is a vendor specific component but should be alive to complete shutdown safely.
     const std::set<std::string> to_starts{"watchdogd"};
-    std::vector<Service*> stop_first;
-    stop_first.reserve(ServiceList::GetInstance().services().size());
+    std::set<std::string> stop_first;
     for (const auto& s : ServiceList::GetInstance()) {
         if (kDebuggingServices.count(s->name())) {
             // keep debugging tools until non critical ones are all gone.
@@ -627,7 +630,7 @@
                            << "': " << result.error();
             }
         } else {
-            stop_first.push_back(s.get());
+            stop_first.insert(s->name());
         }
     }
 
@@ -690,7 +693,7 @@
         LOG(INFO) << "vold not running, skipping vold shutdown";
     }
     // logcat stopped here
-    StopServices(GetDebuggingServices(false /* only_post_data */), 0ms, false /* SIGKILL */);
+    StopServices(kDebuggingServices, 0ms, false /* SIGKILL */);
     // 4. sync, try umount, and optionally run fsck for user shutdown
     {
         Timer sync_timer;
@@ -779,17 +782,17 @@
         sub_reason = "resetprop";
         return Error() << "Failed to reset sys.powerctl property";
     }
-    std::vector<Service*> stop_first;
+    std::set<std::string> stop_first;
     // Remember the services that were enabled. We will need to manually enable them again otherwise
     // triggers like class_start won't restart them.
-    std::vector<Service*> were_enabled;
-    stop_first.reserve(ServiceList::GetInstance().services().size());
+    std::set<std::string> were_enabled;
     for (const auto& s : ServiceList::GetInstance().services_in_shutdown_order()) {
         if (s->is_post_data() && !kDebuggingServices.count(s->name())) {
-            stop_first.push_back(s);
+            stop_first.insert(s->name());
         }
+        // TODO(ioffe): we should also filter out temporary services here.
         if (s->is_post_data() && s->IsEnabled()) {
-            were_enabled.push_back(s);
+            were_enabled.insert(s->name());
         }
     }
     {
@@ -817,8 +820,9 @@
         sub_reason = "vold_reset";
         return result;
     }
-    if (int r = StopServicesAndLogViolations(GetDebuggingServices(true /* only_post_data */),
-                                             sigkill_timeout, false /* SIGKILL */);
+    const auto& debugging_services = GetPostDataDebuggingServices();
+    if (int r = StopServicesAndLogViolations(debugging_services, sigkill_timeout,
+                                             false /* SIGKILL */);
         r > 0) {
         sub_reason = "sigkill_debug";
         // TODO(b/135984674): store information about offending services for debugging.
@@ -847,9 +851,11 @@
         return false;
     });
     // Re-enable services
-    for (const auto& s : were_enabled) {
-        LOG(INFO) << "Re-enabling service '" << s->name() << "'";
-        s->Enable();
+    for (const auto& s : ServiceList::GetInstance()) {
+        if (were_enabled.count(s->name())) {
+            LOG(INFO) << "Re-enabling service '" << s->name() << "'";
+            s->Enable();
+        }
     }
     ServiceList::GetInstance().ResetState();
     LeaveShutdown();
diff --git a/init/service_list.h b/init/service_list.h
index 3b9018b..555da25 100644
--- a/init/service_list.h
+++ b/init/service_list.h
@@ -66,7 +66,6 @@
 
     auto begin() const { return services_.begin(); }
     auto end() const { return services_.end(); }
-    const std::vector<std::unique_ptr<Service>>& services() const { return services_; }
     const std::vector<Service*> services_in_shutdown_order() const;
 
     void MarkPostData();
diff --git a/init/test_utils/service_utils.cpp b/init/test_utils/service_utils.cpp
index ae68679..6426ed9 100644
--- a/init/test_utils/service_utils.cpp
+++ b/init/test_utils/service_utils.cpp
@@ -44,7 +44,7 @@
     }
 
     ServiceInterfacesMap result;
-    for (const auto& service : service_list.services()) {
+    for (const auto& service : service_list) {
         // Create an entry for all services, including services that may not
         // have any declared interfaces.
         result[service->name()] = service->interfaces();