Create thread pool only during app image loading

Prevent having a persistent thread pool that is only used during app
image loading. This saves RAM.

Bug: 116052292
Bug: 120622973
Test: test-art-host
Change-Id: I79ff75d593e001a6c48d50b95865550a24772078
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 02ab50b..4208477 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -21,6 +21,7 @@
 #include <unistd.h>
 
 #include <random>
+#include <thread>
 
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
@@ -389,12 +390,32 @@
                                                   /*out*/std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     TimingLogger logger(__PRETTY_FUNCTION__, /*precise=*/ true, VLOG_IS_ON(image));
+
+    const bool create_thread_pool = true;
+    std::unique_ptr<ThreadPool> thread_pool;
+    if (create_thread_pool) {
+      TimingLogger::ScopedTiming timing("CreateThreadPool", &logger);
+      ScopedThreadStateChange stsc(Thread::Current(), kNative);
+      constexpr size_t kStackSize = 64 * KB;
+      constexpr size_t kMaxRuntimeWorkers = 4u;
+      const size_t num_workers =
+          std::min(static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeWorkers);
+      thread_pool.reset(new ThreadPool("Runtime", num_workers, /*create_peers=*/false, kStackSize));
+      thread_pool->StartWorkers(Thread::Current());
+    }
+
     std::unique_ptr<ImageSpace> space = Init(image_filename,
                                              image_location,
                                              oat_file,
                                              &logger,
+                                             thread_pool.get(),
                                              image_reservation,
                                              error_msg);
+    if (thread_pool != nullptr) {
+      TimingLogger::ScopedTiming timing("CreateThreadPool", &logger);
+      ScopedThreadStateChange stsc(Thread::Current(), kNative);
+      thread_pool.reset();
+    }
     if (space != nullptr) {
       TimingLogger::ScopedTiming timing("RelocateImage", &logger);
       ImageHeader* image_header = reinterpret_cast<ImageHeader*>(space->GetMemMap()->Begin());
@@ -437,6 +458,7 @@
                                           const char* image_location,
                                           const OatFile* oat_file,
                                           TimingLogger* logger,
+                                          ThreadPool* thread_pool,
                                           /*inout*/MemMap* image_reservation,
                                           /*out*/std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -525,6 +547,7 @@
         *image_header,
         file->Fd(),
         logger,
+        thread_pool,
         image_reservation,
         error_msg);
     if (!map.IsValid()) {
@@ -587,6 +610,7 @@
                               const ImageHeader& image_header,
                               int fd,
                               TimingLogger* logger,
+                              ThreadPool* pool,
                               /*inout*/MemMap* image_reservation,
                               /*out*/std::string* error_msg) {
     TimingLogger::ScopedTiming timing("MapImageFile", logger);
@@ -631,10 +655,9 @@
       memcpy(map.Begin(), &image_header, sizeof(ImageHeader));
 
       const uint64_t start = NanoTime();
-      ThreadPool* pool = Runtime::Current()->GetThreadPool();
       Thread* const self = Thread::Current();
       const size_t kMinBlocks = 2;
-      const bool use_parallel = pool != nullptr &&image_header.GetBlockCount() >= kMinBlocks;
+      const bool use_parallel = pool != nullptr && image_header.GetBlockCount() >= kMinBlocks;
       for (const ImageHeader::Block& block : image_header.GetBlocks(temp_map.Begin())) {
         auto function = [&](Thread*) {
           const uint64_t start2 = NanoTime();
@@ -1963,6 +1986,7 @@
                         image_location.c_str(),
                         /*oat_file=*/ nullptr,
                         logger,
+                        /*thread_pool=*/ nullptr,
                         image_reservation,
                         error_msg);
   }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 11e0bf4..fc01f9b 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -34,7 +34,6 @@
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
-#include <thread>
 #include <vector>
 
 #include "android-base/strings.h"
@@ -393,11 +392,6 @@
     jit_->DeleteThreadPool();
   }
 
-  // Thread pools must be deleted before the runtime shuts down to avoid hanging.
-  if (thread_pool_ != nullptr) {
-    thread_pool_.reset();
-  }
-
   // Make sure our internal threads are dead before we start tearing down things they're using.
   GetRuntimeCallbacks()->StopDebugger();
   delete signal_catcher_;
@@ -925,15 +919,6 @@
     }
   }
 
-  if (thread_pool_ == nullptr) {
-    constexpr size_t kStackSize = 64 * KB;
-    constexpr size_t kMaxRuntimeWorkers = 4u;
-    const size_t num_workers =
-        std::min(static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeWorkers);
-    thread_pool_.reset(new ThreadPool("Runtime", num_workers, /*create_peers=*/false, kStackSize));
-    thread_pool_->StartWorkers(Thread::Current());
-  }
-
   // Create the thread pools.
   heap_->CreateThreadPool();
   // Reset the gc performance data at zygote fork so that the GCs
diff --git a/runtime/runtime.h b/runtime/runtime.h
index b76a658..76cfcd1 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -792,10 +792,6 @@
     return verifier_logging_threshold_ms_;
   }
 
-  ThreadPool* GetThreadPool() {
-    return thread_pool_.get();
-  }
-
  private:
   static void InitPlatformSignalHandlers();
 
@@ -896,9 +892,6 @@
   // Shared linear alloc for now.
   std::unique_ptr<LinearAlloc> linear_alloc_;
 
-  // Thread pool
-  std::unique_ptr<ThreadPool> thread_pool_;
-
   // The number of spins that are done before thread suspension is used to forcibly inflate.
   size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;