Merge "Revert^2 "Linker namespace configuration for the Runtime APEX.""
diff --git a/libdexfile/external/include/art_api/ext_dex_file.h b/libdexfile/external/include/art_api/ext_dex_file.h
index 5f64ab1..4a52a2b 100644
--- a/libdexfile/external/include/art_api/ext_dex_file.h
+++ b/libdexfile/external/include/art_api/ext_dex_file.h
@@ -98,11 +98,11 @@
 // Minimal std::string look-alike for a string returned from libdexfile.
 class DexString final {
  public:
-  DexString(DexString&& dex_str) { ReplaceExtString(std::move(dex_str)); }
+  DexString(DexString&& dex_str) noexcept { ReplaceExtString(std::move(dex_str)); }
   explicit DexString(const char* str = "") : ext_string_(ExtDexFileMakeString(str)) {}
   ~DexString() { ExtDexFileFreeString(ext_string_); }
 
-  DexString& operator=(DexString&& dex_str) {
+  DexString& operator=(DexString&& dex_str) noexcept {
     ReplaceExtString(std::move(dex_str));
     return *this;
   }
@@ -163,7 +163,7 @@
 // thread-safe.
 class DexFile {
  public:
-  DexFile(DexFile&& dex_file) {
+  DexFile(DexFile&& dex_file) noexcept {
     ext_dex_file_ = dex_file.ext_dex_file_;
     dex_file.ext_dex_file_ = nullptr;
   }
diff --git a/libprofile/profile/profile_compilation_info.cc b/libprofile/profile/profile_compilation_info.cc
index 02f6344..9b32b9e 100644
--- a/libprofile/profile/profile_compilation_info.cc
+++ b/libprofile/profile/profile_compilation_info.cc
@@ -58,6 +58,12 @@
 // profile_compilation_info object. All the profile line headers are now placed together
 // before corresponding method_encodings and class_ids.
 const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '1', '0', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersionWithCounters[] = { '5', '0', '0', '\0' };
+
+static_assert(sizeof(ProfileCompilationInfo::kProfileVersion) == 4,
+              "Invalid profile version size");
+static_assert(sizeof(ProfileCompilationInfo::kProfileVersionWithCounters) == 4,
+              "Invalid profile version size");
 
 // The name of the profile entry in the dex metadata file.
 // DO NOT CHANGE THIS! (it's similar to classes.dex in the apk files).
@@ -84,18 +90,31 @@
   return kDebugIgnoreChecksum || dex_file_checksum == checksum;
 }
 
+// For storage efficiency we store aggregation counts of up to at most 2^16.
+static uint16_t IncrementAggregationCounter(uint16_t counter, uint16_t value) {
+  if (counter < (std::numeric_limits<uint16_t>::max() - value)) {
+    return counter + value;
+  } else {
+    return std::numeric_limits<uint16_t>::max();
+  }
+}
+
 ProfileCompilationInfo::ProfileCompilationInfo(ArenaPool* custom_arena_pool)
     : default_arena_pool_(),
       allocator_(custom_arena_pool),
       info_(allocator_.Adapter(kArenaAllocProfile)),
-      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)) {
+      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)),
+      aggregation_count_(0) {
+  InitProfileVersionInternal(kProfileVersion);
 }
 
 ProfileCompilationInfo::ProfileCompilationInfo()
     : default_arena_pool_(),
       allocator_(&default_arena_pool_),
       info_(allocator_.Adapter(kArenaAllocProfile)),
-      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)) {
+      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)),
+      aggregation_count_(0) {
+  InitProfileVersionInternal(kProfileVersion);
 }
 
 ProfileCompilationInfo::~ProfileCompilationInfo() {
@@ -326,13 +345,15 @@
 /**
  * Serialization format:
  * [profile_header, zipped[[profile_line_header1, profile_line_header2...],[profile_line_data1,
- *    profile_line_data2...]]]
+ *    profile_line_data2...]],global_aggregation_counter]
  * profile_header:
  *   magic,version,number_of_dex_files,uncompressed_size_of_zipped_data,compressed_data_size
  * profile_line_header:
  *   dex_location,number_of_classes,methods_region_size,dex_location_checksum,num_method_ids
  * profile_line_data:
- *   method_encoding_1,method_encoding_2...,class_id1,class_id2...,startup/post startup bitmap
+ *   method_encoding_1,method_encoding_2...,class_id1,class_id2...,startup/post startup bitmap,
+ *   num_classes,class_counters,num_methods,method_counters
+ * The aggregation counters are only stored if the profile version is kProfileVersionWithCounters.
  * The method_encoding is:
  *    method_id,number_of_inline_caches,inline_cache1,inline_cache2...
  * The inline_cache is:
@@ -355,7 +376,7 @@
   if (!WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic))) {
     return false;
   }
-  if (!WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion))) {
+  if (!WriteBuffer(fd, version_, sizeof(version_))) {
     return false;
   }
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
@@ -370,7 +391,17 @@
         sizeof(uint16_t) * dex_data.class_set.size() +
         methods_region_size +
         dex_data.bitmap_storage.size();
+    if (StoresAggregationCounters()) {
+      required_capacity += sizeof(uint16_t) +  // num class counters
+          sizeof(uint16_t) * dex_data.class_set.size() +
+          sizeof(uint16_t) +  // num method counter
+          sizeof(uint16_t) * dex_data_ptr->GetNumMethodCounters();
+    }
   }
+  if (StoresAggregationCounters()) {
+    required_capacity += sizeof(uint16_t);  // global counter
+  }
+
   // Allow large profiles for non target builds for the case where we are merging many profiles
   // to generate a boot image profile.
   if (kIsTargetBuild && required_capacity > kProfileSizeErrorThresholdInBytes) {
@@ -443,6 +474,24 @@
     buffer.insert(buffer.end(),
                   dex_data.bitmap_storage.begin(),
                   dex_data.bitmap_storage.end());
+
+    if (StoresAggregationCounters()) {
+      AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
+      for (const auto& class_id : dex_data.class_set) {
+        uint16_t type_idx = class_id.index_;
+        AddUintToBuffer(&buffer, dex_data.class_counters[type_idx]);
+      }
+      AddUintToBuffer(&buffer, dex_data.GetNumMethodCounters());
+      for (uint16_t method_idx = 0; method_idx < dex_data.num_method_ids; method_idx++) {
+        if (dex_data.GetHotnessInfo(method_idx).IsInProfile()) {
+          AddUintToBuffer(&buffer, dex_data.method_counters[method_idx]);
+        }
+      }
+    }
+  }
+
+  if (StoresAggregationCounters()) {
+    AddUintToBuffer(&buffer, aggregation_count_);
   }
 
   uint32_t output_size = 0;
@@ -583,7 +632,8 @@
         profile_key,
         checksum,
         profile_index,
-        num_method_ids);
+        num_method_ids,
+        StoresAggregationCounters());
     info_.push_back(dex_file_data);
   }
   DexFileData* result = info_[profile_index];
@@ -943,7 +993,7 @@
   // Read magic and version
   const size_t kMagicVersionSize =
     sizeof(kProfileMagic) +
-    sizeof(kProfileVersion) +
+    kProfileVersionSize +
     sizeof(uint8_t) +  // number of dex files
     sizeof(uint32_t) +  // size of uncompressed profile data
     sizeof(uint32_t);  // size of compressed profile data
@@ -959,10 +1009,18 @@
     *error = "Profile missing magic";
     return kProfileLoadVersionMismatch;
   }
-  if (!safe_buffer.CompareAndAdvance(kProfileVersion, sizeof(kProfileVersion))) {
+  if (safe_buffer.CountUnreadBytes() < kProfileVersionSize) {
+     *error = "Cannot read profile version";
+     return kProfileLoadBadData;
+  }
+  memcpy(version_, safe_buffer.GetCurrentPtr(), kProfileVersionSize);
+  safe_buffer.Advance(kProfileVersionSize);
+  if ((memcmp(version_, kProfileVersion, kProfileVersionSize) != 0) &&
+      (memcmp(version_, kProfileVersionWithCounters, kProfileVersionSize) != 0)) {
     *error = "Profile version mismatch";
     return kProfileLoadVersionMismatch;
   }
+
   if (!safe_buffer.ReadUintAndAdvance<uint8_t>(number_of_dex_files)) {
     *error = "Cannot read the number of dex files";
     return kProfileLoadBadData;
@@ -1047,6 +1105,7 @@
     }
   }
 
+  // Read method bitmap.
   const size_t bytes = data->bitmap_storage.size();
   if (buffer.CountUnreadBytes() < bytes) {
     *error += "Profile EOF reached prematurely for ReadProfileHeaderDexLocation";
@@ -1055,10 +1114,51 @@
   const uint8_t* base_ptr = buffer.GetCurrentPtr();
   std::copy_n(base_ptr, bytes, data->bitmap_storage.data());
   buffer.Advance(bytes);
-  // Read method bitmap.
+
+  if (StoresAggregationCounters()) {
+    ReadAggregationCounters(buffer, *data, error);
+  }
+
   return kProfileLoadSuccess;
 }
 
+bool ProfileCompilationInfo::ReadAggregationCounters(
+      SafeBuffer& buffer,
+      DexFileData& dex_data,
+      /*out*/std::string* error) {
+  size_t unread_bytes_before_op = buffer.CountUnreadBytes();
+  size_t expected_byte_count = sizeof(uint16_t) *
+      (dex_data.class_set.size() + dex_data.method_map.size() + 2);
+  if (unread_bytes_before_op < expected_byte_count) {
+    *error += "Profile EOF reached prematurely for ReadAggregationCounters";
+    return false;
+  }
+
+  uint16_t num_class_counters;
+  READ_UINT(uint16_t, buffer, num_class_counters, error);
+  if (num_class_counters != dex_data.class_set.size()) {
+    *error = "Invalid class size when reading counters";
+    return false;
+  }
+  for (const auto& class_it : dex_data.class_set) {
+    READ_UINT(uint16_t, buffer, dex_data.class_counters[class_it.index_], error);
+  }
+
+  uint16_t num_method_counters;
+  READ_UINT(uint16_t, buffer, num_method_counters, error);
+  if (num_method_counters != dex_data.GetNumMethodCounters()) {
+    *error = "Invalid class size when reading counters";
+    return false;
+  }
+  for (uint16_t method_idx = 0; method_idx < dex_data.num_method_ids; method_idx++) {
+    if (dex_data.GetHotnessInfo(method_idx).IsInProfile()) {
+      READ_UINT(uint16_t, buffer, dex_data.method_counters[method_idx], error);
+    }
+  }
+
+  return true;
+}
+
 // TODO(calin): Fix this API. ProfileCompilationInfo::Load should be static and
 // return a unique pointer to a ProfileCompilationInfo upon success.
 bool ProfileCompilationInfo::Load(
@@ -1370,9 +1470,17 @@
     }
   }
 
+  if (StoresAggregationCounters()) {
+    if (!uncompressed_data.ReadUintAndAdvance<uint16_t>(&aggregation_count_)) {
+      *error = "Cannot read the global aggregation count";
+      return kProfileLoadBadData;
+    }
+  }
+
   // Check that we read everything and that profiles don't contain junk data.
   if (uncompressed_data.CountUnreadBytes() > 0) {
-    *error = "Unexpected content in the profile file";
+    *error = "Unexpected content in the profile file: " +
+        std::to_string(uncompressed_data.CountUnreadBytes()) + " extra bytes";
     return kProfileLoadBadData;
   } else {
     return kProfileLoadSuccess;
@@ -1518,6 +1626,33 @@
                                                                  other_dex_data->checksum));
     DCHECK(dex_data != nullptr);
 
+    // Merge counters for methods and class. Must be done before we merge the bitmaps so that
+    // we can tell if the data is new or not.
+    if (StoresAggregationCounters()) {
+      // Class aggregation counters.
+      if (merge_classes) {
+        for (const dex::TypeIndex& type_idx : other_dex_data->class_set) {
+          uint16_t amount = other.StoresAggregationCounters()
+              ? other_dex_data->class_counters[type_idx.index_]
+              : (dex_data->ContainsClass(type_idx) ? 1 : 0);
+
+          dex_data->class_counters[type_idx.index_] =
+              IncrementAggregationCounter(dex_data->class_counters[type_idx.index_], amount);
+        }
+      }
+
+      // Method aggregation counters.
+      for (uint16_t method_idx = 0; method_idx < other_dex_data->num_method_ids; method_idx++) {
+        if (other_dex_data->GetHotnessInfo(method_idx).IsInProfile()) {
+          uint16_t amount = other.StoresAggregationCounters()
+              ? other_dex_data->method_counters[method_idx]
+              : (dex_data->GetHotnessInfo(method_idx).IsInProfile() ? 1 : 0);
+          dex_data->method_counters[method_idx] =
+              IncrementAggregationCounter(dex_data->method_counters[method_idx], amount);
+        }
+      }
+    }
+
     // Merge the classes.
     if (merge_classes) {
       dex_data->class_set.insert(other_dex_data->class_set.begin(),
@@ -1552,6 +1687,13 @@
     // Merge the method bitmaps.
     dex_data->MergeBitmap(*other_dex_data);
   }
+
+  // Global aggregation counter.
+  if (StoresAggregationCounters()) {
+    uint16_t amount = other.StoresAggregationCounters() ? other.aggregation_count_ : 1;
+    aggregation_count_ = IncrementAggregationCounter(aggregation_count_, amount);
+  }
+
   return true;
 }
 
@@ -1614,11 +1756,7 @@
 
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const {
   const DexFileData* dex_data = FindDexData(&dex_file);
-  if (dex_data != nullptr) {
-    const ArenaSet<dex::TypeIndex>& classes = dex_data->class_set;
-    return classes.find(type_idx) != classes.end();
-  }
-  return false;
+  return (dex_data != nullptr) && dex_data->ContainsClass(type_idx);
 }
 
 uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
@@ -1753,6 +1891,9 @@
 bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
   // No need to compare profile_key_map_. That's only a cache for fast search.
   // All the information is already in the info_ vector.
+  if (memcmp(version_, other.version_, kProfileVersionSize) != 0) {
+    return false;
+  }
   if (info_.size() != other.info_.size()) {
     return false;
   }
@@ -1763,6 +1904,9 @@
       return false;
     }
   }
+  if (aggregation_count_ != other.aggregation_count_) {
+    return false;
+  }
   return true;
 }
 
@@ -1965,9 +2109,8 @@
   SetMethodHotness(index, flags);
 
   if ((flags & MethodHotness::kFlagHot) != 0) {
-    method_map.FindOrAdd(
-        index,
-        InlineCacheMap(std::less<uint16_t>(), allocator_->Adapter(kArenaAllocProfile)));
+    ProfileCompilationInfo::InlineCacheMap* result = FindOrAddMethod(index);
+    DCHECK(result != nullptr);
   }
   return true;
 }
@@ -2000,6 +2143,43 @@
   return ret;
 }
 
+int32_t ProfileCompilationInfo::DexFileData::GetMethodAggregationCounter(
+      uint16_t method_idx) const {
+  CHECK_GT(method_counters.size(), method_idx) << "Profile not prepared for aggregation counters";
+  if (!GetHotnessInfo(method_idx).IsInProfile()) {
+    return -1;
+  }
+
+  return method_counters[method_idx];
+}
+
+int32_t ProfileCompilationInfo::DexFileData::GetClassAggregationCounter(uint16_t type_idx) const {
+  CHECK_GT(class_counters.size(), type_idx) << "Profile not prepared for aggregation counters";
+  if (!ContainsClass(dex::TypeIndex(type_idx))) {
+    return -1;
+  }
+
+  return class_counters[type_idx];
+}
+
+int32_t ProfileCompilationInfo::GetMethodAggregationCounter(
+      const MethodReference& method_ref) const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  const DexFileData* dex_data = FindDexData(method_ref.dex_file);
+  return dex_data == nullptr ? -1 : dex_data->GetMethodAggregationCounter(method_ref.index);
+}
+
+int32_t ProfileCompilationInfo::GetClassAggregationCounter(const TypeReference& type_ref) const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  const DexFileData* dex_data = FindDexData(type_ref.dex_file);
+  return dex_data == nullptr ? -1 : dex_data->GetClassAggregationCounter(type_ref.index);
+}
+
+uint16_t ProfileCompilationInfo::GetAggregationCounter() const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  return aggregation_count_;
+}
+
 ProfileCompilationInfo::DexPcData*
 ProfileCompilationInfo::FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc) {
   return &(inline_cache->FindOrAdd(dex_pc, DexPcData(&allocator_))->second);
@@ -2096,4 +2276,46 @@
   profile_key_map_.clear();
 }
 
+bool ProfileCompilationInfo::StoresAggregationCounters() const {
+  return memcmp(version_, kProfileVersionWithCounters, sizeof(kProfileVersionWithCounters)) == 0;
+}
+
+void ProfileCompilationInfo::PrepareForAggregationCounters() {
+  InitProfileVersionInternal(kProfileVersionWithCounters);
+  for (DexFileData* dex_data : info_) {
+    dex_data->PrepareForAggregationCounters();
+  }
+}
+
+void ProfileCompilationInfo::DexFileData::PrepareForAggregationCounters() {
+  method_counters.resize(num_method_ids);
+  // TODO(calin): we should store the maximum number of types in the profile.
+  // It will simplify quite a few things and make this storage allocation
+  // more efficient.
+  size_t max_elems = 1 << (kBitsPerByte * sizeof(uint16_t));
+  class_counters.resize(max_elems);
+}
+
+const uint8_t* ProfileCompilationInfo::GetVersion() const {
+  return version_;
+}
+
+void ProfileCompilationInfo::InitProfileVersionInternal(const uint8_t version[]) {
+  CHECK(
+      (memcmp(version, kProfileVersion, kProfileVersionSize) == 0) ||
+      (memcmp(version, kProfileVersionWithCounters, kProfileVersionSize) == 0));
+  memcpy(version_, version, kProfileVersionSize);
+}
+
+uint16_t ProfileCompilationInfo::DexFileData::GetNumMethodCounters() const {
+  uint16_t num_method_counters = 0;
+  for (uint16_t method_idx = 0; method_idx < num_method_ids; method_idx++) {
+    num_method_counters += GetHotnessInfo(method_idx).IsInProfile() ? 1 : 0;
+  }
+  return num_method_counters;
+}
+
+bool ProfileCompilationInfo::DexFileData::ContainsClass(const dex::TypeIndex type_index) const {
+  return class_set.find(type_index) != class_set.end();
+}
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info.h b/libprofile/profile/profile_compilation_info.h
index 92fa098..fa4615b 100644
--- a/libprofile/profile/profile_compilation_info.h
+++ b/libprofile/profile/profile_compilation_info.h
@@ -73,9 +73,10 @@
  public:
   static const uint8_t kProfileMagic[];
   static const uint8_t kProfileVersion[];
-
+  static const uint8_t kProfileVersionWithCounters[];
   static const char kDexMetadataProfileEntry[];
 
+  static constexpr size_t kProfileVersionSize = 4;
   static constexpr uint8_t kIndividualInlineCacheSize = 5;
 
   // Data structures for encoding the offline representation of inline caches.
@@ -447,6 +448,30 @@
   // Clears all the data from the profile.
   void ClearData();
 
+  // Prepare the profile to store aggregation counters.
+  // This will change the profile version and allocate extra storage for the counters.
+  // It allocates 2 bytes for every possible method and class, so do not use in performance
+  // critical code which needs to be memory efficient.
+  void PrepareForAggregationCounters();
+
+  // Returns true if the profile is configured to store aggregation counters.
+  bool StoresAggregationCounters() const;
+
+  // Returns the aggregation counter for the given method.
+  // Returns -1 if the method is not in the profile.
+  // CHECKs that the profile is configured to store aggregations counters.
+  int32_t GetMethodAggregationCounter(const MethodReference& method_ref) const;
+  // Returns the aggregation counter for the given class.
+  // Returns -1 if the class is not in the profile.
+  // CHECKs that the profile is configured to store aggregations counters.
+  int32_t GetClassAggregationCounter(const TypeReference& type_ref) const;
+  // Returns the number of times the profile was merged.
+  // CHECKs that the profile is configured to store aggregations counters.
+  uint16_t GetAggregationCounter() const;
+
+  // Return the version of this profile.
+  const uint8_t* GetVersion() const;
+
  private:
   enum ProfileLoadStatus {
     kProfileLoadWouldOverwiteData,
@@ -470,7 +495,8 @@
                 const std::string& key,
                 uint32_t location_checksum,
                 uint16_t index,
-                uint32_t num_methods)
+                uint32_t num_methods,
+                bool store_aggregation_counters)
         : allocator_(allocator),
           profile_key(key),
           profile_index(index),
@@ -478,13 +504,18 @@
           method_map(std::less<uint16_t>(), allocator->Adapter(kArenaAllocProfile)),
           class_set(std::less<dex::TypeIndex>(), allocator->Adapter(kArenaAllocProfile)),
           num_method_ids(num_methods),
-          bitmap_storage(allocator->Adapter(kArenaAllocProfile)) {
+          bitmap_storage(allocator->Adapter(kArenaAllocProfile)),
+          method_counters(allocator->Adapter(kArenaAllocProfile)),
+          class_counters(allocator->Adapter(kArenaAllocProfile)) {
       bitmap_storage.resize(ComputeBitmapStorage(num_method_ids));
       if (!bitmap_storage.empty()) {
         method_bitmap =
             BitMemoryRegion(MemoryRegion(
                 &bitmap_storage[0], bitmap_storage.size()), 0, ComputeBitmapBits(num_method_ids));
       }
+      if (store_aggregation_counters) {
+        PrepareForAggregationCounters();
+      }
     }
 
     static size_t ComputeBitmapBits(uint32_t num_method_ids) {
@@ -495,7 +526,13 @@
     }
 
     bool operator==(const DexFileData& other) const {
-      return checksum == other.checksum && method_map == other.method_map;
+      return checksum == other.checksum &&
+          num_method_ids == other.num_method_ids &&
+          method_map == other.method_map &&
+          class_set == other.class_set &&
+          (BitMemoryRegion::Compare(method_bitmap, other.method_bitmap) == 0) &&
+          class_counters == other.class_counters &&
+          method_counters == other.method_counters;
     }
 
     // Mark a method as executed at least once.
@@ -510,6 +547,14 @@
 
     void SetMethodHotness(size_t index, MethodHotness::Flag flags);
     MethodHotness GetHotnessInfo(uint32_t dex_method_index) const;
+    void PrepareForAggregationCounters();
+
+    int32_t GetMethodAggregationCounter(uint16_t method_index) const;
+    int32_t GetClassAggregationCounter(uint16_t type_index) const;
+
+    uint16_t GetNumMethodCounters() const;
+
+    bool ContainsClass(const dex::TypeIndex type_index) const;
 
     // The allocator used to allocate new inline cache maps.
     ArenaAllocator* const allocator_;
@@ -519,7 +564,7 @@
     uint8_t profile_index;
     // The dex checksum.
     uint32_t checksum;
-    // The methonds' profile information.
+    // The methods' profile information.
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
@@ -531,6 +576,8 @@
     uint32_t num_method_ids;
     ArenaVector<uint8_t> bitmap_storage;
     BitMemoryRegion method_bitmap;
+    ArenaVector<uint16_t> method_counters;
+    ArenaVector<uint16_t> class_counters;
 
    private:
     enum BitmapIndex {
@@ -761,6 +808,11 @@
                    const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
                    /*out*/std::string* error);
 
+  // Read the aggregation counters from the buffer.
+  bool ReadAggregationCounters(SafeBuffer& buffer,
+                               DexFileData& dex_data,
+                               /*out*/std::string* error);
+
   // The method generates mapping of profile indices while merging a new profile
   // data into current data. It returns true, if the mapping was successful.
   bool RemapProfileIndex(const std::vector<ProfileLineHeader>& profile_line_headers,
@@ -792,6 +844,9 @@
   // if no previous data exists.
   DexPcData* FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc);
 
+  // Initializes the profile version to the desired one.
+  void InitProfileVersionInternal(const uint8_t version[]);
+
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
@@ -809,6 +864,14 @@
   // This is used to speed up searches since it avoids iterating
   // over the info_ vector when searching by profile key.
   ArenaSafeMap<const std::string, uint8_t> profile_key_map_;
+
+  // The version of the profile.
+  // This may change if a "normal" profile is transformed to keep track
+  // of aggregation counters.
+  uint8_t version_[kProfileVersionSize];
+
+  // Stored only when the profile is configured to keep track of aggregation counters.
+  uint16_t aggregation_count_;
 };
 
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info_test.cc b/libprofile/profile/profile_compilation_info_test.cc
index a2bfe50..47019c4 100644
--- a/libprofile/profile/profile_compilation_info_test.cc
+++ b/libprofile/profile/profile_compilation_info_test.cc
@@ -1141,4 +1141,180 @@
   ASSERT_TRUE(loaded_info.Equals(info));
 }
 
+TEST_F(ProfileCompilationInfoTest, PrepareForAggregationCounters) {
+  ProfileCompilationInfo info;
+  ASSERT_EQ(
+      memcmp(info.GetVersion(),
+             ProfileCompilationInfo::kProfileVersion,
+             ProfileCompilationInfo::kProfileVersionSize),
+      0);
+
+  info.PrepareForAggregationCounters();
+
+  ASSERT_EQ(
+      memcmp(info.GetVersion(),
+             ProfileCompilationInfo::kProfileVersionWithCounters,
+             ProfileCompilationInfo::kProfileVersionSize),
+      0);
+  ASSERT_TRUE(info.StoresAggregationCounters());
+  ASSERT_EQ(info.GetAggregationCounter(), 0);
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeWithAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+  ProfileCompilationInfo info3;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  info1.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+  info2.AddMethodIndex(Hotness::kFlagPostStartup, location, checksum, 3, kMaxMethodIds);
+  info3.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+
+  AddMethod(location, checksum, /* method_idx= */ 6, &info2);
+  AddMethod(location, checksum, /* method_idx= */ 6, &info3);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+  AddClass(location, checksum, dex::TypeIndex(20), &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(30), &info1);
+  AddClass(location, checksum, dex::TypeIndex(30), &info2);
+  AddClass(location, checksum, dex::TypeIndex(30), &info3);
+
+  ASSERT_EQ(info1.GetAggregationCounter(), 0);
+  info1.MergeWith(info2);
+  ASSERT_EQ(info1.GetAggregationCounter(), 1);
+  info1.MergeWith(info3);
+  ASSERT_EQ(info1.GetAggregationCounter(), 2);
+
+  ASSERT_EQ(0, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+  ASSERT_EQ(2, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 3)));
+  ASSERT_EQ(1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 6)));
+
+  ASSERT_EQ(0, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(1, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+  ASSERT_EQ(2, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(30))));
+
+  // Check methods that do not exists.
+  ASSERT_EQ(-1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 4)));
+  ASSERT_EQ(-1, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(40))));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveAndLoadAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+  ProfileCompilationInfo info3;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  info1.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+  info2.AddMethodIndex(Hotness::kFlagPostStartup, location, checksum, 3, kMaxMethodIds);
+  info3.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+
+  AddMethod(location, checksum, /* method_idx= */ 6, &info2);
+  AddMethod(location, checksum, /* method_idx= */ 6, &info3);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+  AddClass(location, checksum, dex::TypeIndex(20), &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(30), &info1);
+  AddClass(location, checksum, dex::TypeIndex(30), &info2);
+  AddClass(location, checksum, dex::TypeIndex(30), &info3);
+
+  info1.MergeWith(info2);
+  info1.MergeWith(info3);
+
+  ScratchFile profile;
+
+  ASSERT_TRUE(info1.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  loaded_info.PrepareForAggregationCounters();
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(info1));
+
+  ASSERT_EQ(2, loaded_info.GetAggregationCounter());
+
+  ASSERT_EQ(0, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(1, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+  ASSERT_EQ(2, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 3)));
+  ASSERT_EQ(1, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 6)));
+
+  ASSERT_EQ(0, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(1, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+  ASSERT_EQ(2, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(30))));
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeTwoWithAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+  AddClass(location, checksum, dex::TypeIndex(10), &info2);
+
+  info1.MergeWith(info2);
+  info1.MergeWith(info2);
+  ASSERT_EQ(2, info1.GetAggregationCounter());
+
+  // Save and load the profile to create a copy of the data
+  ScratchFile profile;
+  info1.Save(GetFd(profile));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  loaded_info.PrepareForAggregationCounters();
+  profile.GetFile()->ResetOffset();
+  loaded_info.Load(GetFd(profile));
+
+  // Merge the data
+  info1.MergeWith(loaded_info);
+
+  ASSERT_EQ(4, info1.GetAggregationCounter());
+
+  ASSERT_EQ(0, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(4, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+
+  ASSERT_EQ(4, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(0, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+}
+
 }  // namespace art
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index 4dc5262..b65bb43 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -32,7 +32,8 @@
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
         const std::vector<ScopedFlock>& profile_files,
         const ScopedFlock& reference_profile_file,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   DCHECK(!profile_files.empty());
 
   ProfileCompilationInfo info;
@@ -42,6 +43,12 @@
     return kErrorBadProfiles;
   }
 
+  // If we need to store aggregation counters (e.g. for the boot image profile),
+  // prepare the reference profile now.
+  if (store_aggregation_counters) {
+    info.PrepareForAggregationCounters();
+  }
+
   // Store the current state of the reference profile before merging with the current profiles.
   uint32_t number_of_methods = info.GetNumberOfMethods();
   uint32_t number_of_classes = info.GetNumberOfResolvedClasses();
@@ -124,7 +131,8 @@
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<int>& profile_files_fd,
         int reference_profile_file_fd,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   DCHECK_GE(reference_profile_file_fd, 0);
 
   std::string error;
@@ -147,13 +155,15 @@
 
   return ProcessProfilesInternal(profile_files.Get(),
                                  reference_profile_file,
-                                 filter_fn);
+                                 filter_fn,
+                                 store_aggregation_counters);
 }
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<std::string>& profile_files,
         const std::string& reference_profile_file,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   std::string error;
 
   ScopedFlockList profile_files_list(profile_files.size());
@@ -171,7 +181,8 @@
 
   return ProcessProfilesInternal(profile_files_list.Get(),
                                  locked_reference_profile_file,
-                                 filter_fn);
+                                 filter_fn,
+                                 store_aggregation_counters);
 }
 
 }  // namespace art
diff --git a/profman/profile_assistant.h b/profman/profile_assistant.h
index c1d6f8e..45d4e38 100644
--- a/profman/profile_assistant.h
+++ b/profman/profile_assistant.h
@@ -55,19 +55,22 @@
       const std::vector<std::string>& profile_files,
       const std::string& reference_profile_file,
       const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
-          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll,
+      bool store_aggregation_counters = false);
 
   static ProcessingResult ProcessProfiles(
       const std::vector<int>& profile_files_fd_,
       int reference_profile_file_fd,
       const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
-          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll,
+      bool store_aggregation_counters = false);
 
  private:
   static ProcessingResult ProcessProfilesInternal(
       const std::vector<ScopedFlock>& profile_files,
       const ScopedFlock& reference_profile_file,
-      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn);
+      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+      bool store_aggregation_counters);
 
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index e9d3290..e906151 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -102,7 +102,7 @@
       }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
-      ASSERT_TRUE(info->AddClassIndex(dex_location1,
+      ASSERT_TRUE(info->AddClassIndex(ProfileCompilationInfo::GetProfileDexFileKey(dex_location1),
                                       dex_location_checksum1,
                                       dex::TypeIndex(i),
                                       number_of_methods1));
@@ -1300,4 +1300,57 @@
   }
 }
 
+TEST_F(ProfileAssistantTest, MergeProfilesWithCounters) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  // The new profile info will contain methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  const uint16_t kNumberOfClasses = 50;
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+  const DexFile& d1 = *dex_files[0];
+  const DexFile& d2 = *dex_files[1];
+  ProfileCompilationInfo info1;
+  SetupProfile(
+      d1.GetLocation(), d1.GetLocationChecksum(),
+      d2.GetLocation(), d2.GetLocationChecksum(),
+      kNumberOfMethodsToEnableCompilation, kNumberOfClasses, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile(
+      d1.GetLocation(), d1.GetLocationChecksum(),
+      d2.GetLocation(), d2.GetLocationChecksum(),
+      kNumberOfMethodsToEnableCompilation, kNumberOfClasses, profile2, &info2);
+
+  std::string profman_cmd = GetProfmanCmd();
+  std::vector<std::string> argv_str;
+  argv_str.push_back(profman_cmd);
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile1.GetFd()));
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile2.GetFd()));
+  argv_str.push_back("--reference-profile-file-fd=" + std::to_string(reference_profile.GetFd()));
+  argv_str.push_back("--store-aggregation-counters");
+  std::string error;
+
+  EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0) << error;
+
+  // Verify that we can load the result and that the counters are in place.
+
+  ProfileCompilationInfo result;
+  result.PrepareForAggregationCounters();
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile.GetFd()));
+
+  ASSERT_TRUE(result.StoresAggregationCounters());
+  ASSERT_EQ(2, result.GetAggregationCounter());
+
+  for (uint16_t i = 0; i < kNumberOfMethodsToEnableCompilation; i++) {
+    ASSERT_EQ(1, result.GetMethodAggregationCounter(MethodReference(&d1, i)));
+    ASSERT_EQ(1, result.GetMethodAggregationCounter(MethodReference(&d2, i)));
+  }
+  for (uint16_t i = 0; i < kNumberOfClasses; i++) {
+    ASSERT_EQ(1, result.GetClassAggregationCounter(TypeReference(&d1, dex::TypeIndex(i))));
+  }
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index 2935a05..a0c387d 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -157,6 +157,9 @@
   UsageError("      the file passed with --profile-fd(file) to the profile passed with");
   UsageError("      --reference-profile-fd(file) and update at the same time the profile-key");
   UsageError("      of entries corresponding to the apks passed with --apk(-fd).");
+  UsageError("  --store-aggregation-counters: if present, profman will compute and store");
+  UsageError("      the aggregation counters of classes and methods in the output profile.");
+  UsageError("      In this case the profile will have a different version.");
   UsageError("");
 
   exit(EXIT_FAILURE);
@@ -200,7 +203,8 @@
       test_profile_class_percentage_(kDefaultTestProfileClassPercentage),
       test_profile_seed_(NanoTime()),
       start_ns_(NanoTime()),
-      copy_and_update_profile_key_(false) {}
+      copy_and_update_profile_key_(false),
+      store_aggregation_counters_(false) {}
 
   ~ProfMan() {
     LogCompletionTime();
@@ -287,6 +291,8 @@
         ParseUintOption(option, "--generate-test-profile-seed", &test_profile_seed_, Usage);
       } else if (option.starts_with("--copy-and-update-profile-key")) {
         copy_and_update_profile_key_ = true;
+      } else if (option.starts_with("--store-aggregation-counters")) {
+        store_aggregation_counters_ = true;
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -363,12 +369,14 @@
       File file(reference_profile_file_fd_, false);
       result = ProfileAssistant::ProcessProfiles(profile_files_fd_,
                                                  reference_profile_file_fd_,
-                                                 filter_fn);
+                                                 filter_fn,
+                                                 store_aggregation_counters_);
       CloseAllFds(profile_files_fd_, "profile_files_fd_");
     } else {
       result = ProfileAssistant::ProcessProfiles(profile_files_,
                                                  reference_profile_file_,
-                                                 filter_fn);
+                                                 filter_fn,
+                                                 store_aggregation_counters_);
     }
     return result;
   }
@@ -1279,6 +1287,7 @@
   uint32_t test_profile_seed_;
   uint64_t start_ns_;
   bool copy_and_update_profile_key_;
+  bool store_aggregation_counters_;
 };
 
 // See ProfileAssistant::ProcessingResult for return codes.
diff --git a/runtime/base/locks.cc b/runtime/base/locks.cc
index cfc9f1d..a7922a2 100644
--- a/runtime/base/locks.cc
+++ b/runtime/base/locks.cc
@@ -61,6 +61,7 @@
 Mutex* Locks::reference_queue_soft_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_weak_references_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::runtime_thread_pool_lock_ = nullptr;
 Mutex* Locks::cha_lock_ = nullptr;
 Mutex* Locks::subtype_check_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
@@ -154,6 +155,7 @@
     DCHECK(user_code_suspension_lock_ != nullptr);
     DCHECK(dex_lock_ != nullptr);
     DCHECK(native_debug_interface_lock_ != nullptr);
+    DCHECK(runtime_thread_pool_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -189,6 +191,10 @@
     DCHECK(runtime_shutdown_lock_ == nullptr);
     runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kRuntimeThreadPoolLock);
+    DCHECK(runtime_thread_pool_lock_ == nullptr);
+    runtime_thread_pool_lock_ = new Mutex("runtime thread pool lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock);
     DCHECK(profiler_lock_ == nullptr);
     profiler_lock_ = new Mutex("profiler lock", current_lock_level);
diff --git a/runtime/base/locks.h b/runtime/base/locks.h
index 8cbe372..57719f1 100644
--- a/runtime/base/locks.h
+++ b/runtime/base/locks.h
@@ -117,6 +117,7 @@
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
+  kRuntimeThreadPoolLock,
   kRuntimeShutdownLock,
   kTraceLock,
   kHeapBitmapLock,
@@ -224,8 +225,11 @@
   // Guards shutdown of the runtime.
   static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
 
+  // Runtime thread pool lock.
+  static Mutex* runtime_thread_pool_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+
   // Guards background profiler global state.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_thread_pool_lock_);
 
   // Guards trace (ie traceview) requests.
   static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index bf8aaae..8f9967f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1416,6 +1416,11 @@
   TrimSpaces(self);
   // Trim arenas that may have been used by JIT or verifier.
   runtime->GetArenaPool()->TrimMaps();
+  {
+    // TODO: Move this to a callback called when startup is finished (b/120671223).
+    ScopedTrace trace2("Delete thread pool");
+    runtime->DeleteThreadPool();
+  }
 }
 
 class TrimIndirectReferenceTableClosure : public Closure {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 66db063..4f9b3f9 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -21,7 +21,6 @@
 #include <unistd.h>
 
 #include <random>
-#include <thread>
 
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
@@ -685,40 +684,12 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     TimingLogger logger(__PRETTY_FUNCTION__, /*precise=*/ true, VLOG_IS_ON(image));
 
-    std::unique_ptr<ThreadPool> thread_pool;
     std::unique_ptr<ImageSpace> space = Init(image_filename,
                                              image_location,
                                              oat_file,
                                              &logger,
-                                             &thread_pool,
                                              image_reservation,
                                              error_msg);
-    if (thread_pool != nullptr) {
-      // Delay the thread pool deletion to prevent the deletion slowing down the startup by causing
-      // preemption. TODO: Just do this in heap trim.
-      static constexpr uint64_t kThreadPoolDeleteDelay = MsToNs(5000);
-
-      class DeleteThreadPoolTask : public HeapTask {
-       public:
-        explicit DeleteThreadPoolTask(std::unique_ptr<ThreadPool>&& thread_pool)
-            : HeapTask(NanoTime() + kThreadPoolDeleteDelay), thread_pool_(std::move(thread_pool)) {}
-
-        void Run(Thread* self) override {
-          ScopedTrace trace("DestroyThreadPool");
-          ScopedThreadStateChange stsc(self, kNative);
-          thread_pool_.reset();
-        }
-
-       private:
-        std::unique_ptr<ThreadPool> thread_pool_;
-      };
-      gc::TaskProcessor* const processor = Runtime::Current()->GetHeap()->GetTaskProcessor();
-      // The thread pool is already done being used since Init has finished running. Deleting the
-      // thread pool is done async since it takes a non-trivial amount of time to do.
-      if (processor != nullptr) {
-        processor->AddTask(Thread::Current(), new DeleteThreadPoolTask(std::move(thread_pool)));
-      }
-    }
     if (space != nullptr) {
       uint32_t expected_reservation_size =
           RoundUp(space->GetImageHeader().GetImageSize(), kPageSize);
@@ -779,7 +750,6 @@
                                           const char* image_location,
                                           const OatFile* oat_file,
                                           TimingLogger* logger,
-                                          std::unique_ptr<ThreadPool>* thread_pool,
                                           /*inout*/MemMap* image_reservation,
                                           /*out*/std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -856,18 +826,6 @@
       return nullptr;
     }
 
-    const size_t kMinBlocks = 2;
-    if (thread_pool != nullptr && image_header->GetBlockCount() >= kMinBlocks) {
-      TimingLogger::ScopedTiming timing("CreateThreadPool", logger);
-      ScopedThreadStateChange stsc(Thread::Current(), kNative);
-      constexpr size_t kStackSize = 64 * KB;
-      constexpr size_t kMaxRuntimeWorkers = 4u;
-      const size_t num_workers =
-          std::min(static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeWorkers);
-      thread_pool->reset(new ThreadPool("Image", num_workers, /*create_peers=*/false, kStackSize));
-      thread_pool->get()->StartWorkers(Thread::Current());
-    }
-
     // GetImageBegin is the preferred address to map the image. If we manage to map the
     // image at the image begin, the amount of fixup work required is minimized.
     // If it is pic we will retry with error_msg for the failure case. Pass a null error_msg to
@@ -880,7 +838,6 @@
         *image_header,
         file->Fd(),
         logger,
-        thread_pool != nullptr ? thread_pool->get() : nullptr,
         image_reservation,
         error_msg);
     if (!map.IsValid()) {
@@ -971,7 +928,6 @@
                               const ImageHeader& image_header,
                               int fd,
                               TimingLogger* logger,
-                              ThreadPool* pool,
                               /*inout*/MemMap* image_reservation,
                               /*out*/std::string* error_msg) {
     TimingLogger::ScopedTiming timing("MapImageFile", logger);
@@ -1015,9 +971,12 @@
       }
       memcpy(map.Begin(), &image_header, sizeof(ImageHeader));
 
+      Runtime::ScopedThreadPoolUsage stpu;
+      ThreadPool* const pool = stpu.GetThreadPool();
       const uint64_t start = NanoTime();
       Thread* const self = Thread::Current();
-      const bool use_parallel = pool != nullptr;
+      static constexpr size_t kMinBlocks = 2u;
+      const bool use_parallel = pool != nullptr && image_header.GetBlockCount() >= kMinBlocks;
       for (const ImageHeader::Block& block : image_header.GetBlocks(temp_map.Begin())) {
         auto function = [&](Thread*) {
           const uint64_t start2 = NanoTime();
@@ -1915,7 +1874,6 @@
                         image_location.c_str(),
                         /*oat_file=*/ nullptr,
                         logger,
-                        /*thread_pool=*/ nullptr,
                         image_reservation,
                         error_msg);
   }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index a633a63..d1896e6 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -44,7 +44,7 @@
 #include "handle_scope-inl.h"
 #include "interpreter_mterp_impl.h"
 #include "interpreter_switch_impl.h"
-#include "jit/jit.h"
+#include "jit/jit-inl.h"
 #include "mirror/call_site.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
diff --git a/runtime/interpreter/interpreter_switch_impl-inl.h b/runtime/interpreter/interpreter_switch_impl-inl.h
index 94cb3de..aec2aa2 100644
--- a/runtime/interpreter/interpreter_switch_impl-inl.h
+++ b/runtime/interpreter/interpreter_switch_impl-inl.h
@@ -26,7 +26,7 @@
 #include "dex/dex_instruction_list.h"
 #include "experimental_flags.h"
 #include "interpreter_common.h"
-#include "jit/jit.h"
+#include "jit/jit-inl.h"
 #include "jvalue-inl.h"
 #include "mirror/string-alloc-inl.h"
 #include "nth_caller_visitor.h"
diff --git a/runtime/jit/jit-inl.h b/runtime/jit/jit-inl.h
new file mode 100644
index 0000000..80324ad
--- /dev/null
+++ b/runtime/jit/jit-inl.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_JIT_INL_H_
+#define ART_RUNTIME_JIT_JIT_INL_H_
+
+#include "jit/jit.h"
+
+#include "art_method.h"
+#include "base/bit_utils.h"
+#include "thread.h"
+#include "runtime-inl.h"
+
+namespace art {
+namespace jit {
+
+inline bool Jit::ShouldUsePriorityThreadWeight(Thread* self) {
+  return self->IsJitSensitiveThread() && Runtime::Current()->InJankPerceptibleProcessState();
+}
+
+inline void Jit::AddSamples(Thread* self,
+                            ArtMethod* method,
+                            uint16_t samples,
+                            bool with_backedges) {
+  if (Jit::ShouldUsePriorityThreadWeight(self)) {
+    samples *= PriorityThreadWeight();
+  }
+  uint32_t old_count = method->GetCounter();
+  uint32_t new_count = old_count + samples;
+
+  // The full check is fairly expensive so we just add to hotness most of the time,
+  // and we do the full check only when some of the higher bits of the count change.
+  // NB: The method needs to see the transitions of the counter past the thresholds.
+  uint32_t old_batch = RoundDown(old_count, kJitSamplesBatchSize);  // Clear lower bits.
+  uint32_t new_batch = RoundDown(new_count, kJitSamplesBatchSize);  // Clear lower bits.
+  if (UNLIKELY(old_batch == 0)) {
+    // For low sample counts, we check every time (which is important for tests).
+    if (!MaybeCompileMethod(self, method, old_count, new_count, with_backedges)) {
+      // Tests may check that the counter is 0 for methods that we never compile.
+      return;  // Ignore the samples for now and retry later.
+    }
+  } else if (UNLIKELY(old_batch != new_batch)) {
+    // For high sample counts, we check only when we move past the batch boundary.
+    if (!MaybeCompileMethod(self, method, old_batch, new_batch, with_backedges)) {
+      // OSR compilation will ignore the samples if they don't have backedges.
+      return;  // Ignore the samples for now and retry later.
+    }
+  }
+
+  method->SetCounter(new_count);
+}
+
+}  // namespace jit
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_JIT_INL_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 03c97f4..d44bd59 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -28,6 +28,7 @@
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
+#include "jit-inl.h"
 #include "jit_code_cache.h"
 #include "jni/java_vm_ext.h"
 #include "mirror/method_handle_impl.h"
@@ -68,6 +69,14 @@
 };
 DEFINE_RUNTIME_DEBUG_FLAG(StressModeHelper, kSlowMode);
 
+uint32_t JitOptions::RoundUpThreshold(uint32_t threshold) {
+  if (threshold > kJitSamplesBatchSize) {
+    threshold = RoundUp(threshold, kJitSamplesBatchSize);
+  }
+  CHECK_LE(threshold, std::numeric_limits<uint16_t>::max());
+  return threshold;
+}
+
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
   jit_options->use_jit_compilation_ = options.GetOrDefault(RuntimeArgumentMap::UseJitCompilation);
@@ -93,30 +102,25 @@
                    : kJitStressDefaultCompileThreshold)
             : kJitDefaultCompileThreshold;
   }
-  if (jit_options->compile_threshold_ > std::numeric_limits<uint16_t>::max()) {
-    LOG(FATAL) << "Method compilation threshold is above its internal limit.";
-  }
+  jit_options->compile_threshold_ = RoundUpThreshold(jit_options->compile_threshold_);
 
   if (options.Exists(RuntimeArgumentMap::JITWarmupThreshold)) {
     jit_options->warmup_threshold_ = *options.Get(RuntimeArgumentMap::JITWarmupThreshold);
-    if (jit_options->warmup_threshold_ > std::numeric_limits<uint16_t>::max()) {
-      LOG(FATAL) << "Method warmup threshold is above its internal limit.";
-    }
   } else {
     jit_options->warmup_threshold_ = jit_options->compile_threshold_ / 2;
   }
+  jit_options->warmup_threshold_ = RoundUpThreshold(jit_options->warmup_threshold_);
 
   if (options.Exists(RuntimeArgumentMap::JITOsrThreshold)) {
     jit_options->osr_threshold_ = *options.Get(RuntimeArgumentMap::JITOsrThreshold);
-    if (jit_options->osr_threshold_ > std::numeric_limits<uint16_t>::max()) {
-      LOG(FATAL) << "Method on stack replacement threshold is above its internal limit.";
-    }
   } else {
     jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
     if (jit_options->osr_threshold_ > std::numeric_limits<uint16_t>::max()) {
-      jit_options->osr_threshold_ = std::numeric_limits<uint16_t>::max();
+      jit_options->osr_threshold_ =
+          RoundDown(std::numeric_limits<uint16_t>::max(), kJitSamplesBatchSize);
     }
   }
+  jit_options->osr_threshold_ = RoundUpThreshold(jit_options->osr_threshold_);
 
   if (options.Exists(RuntimeArgumentMap::JITPriorityThreadWeight)) {
     jit_options->priority_thread_weight_ =
@@ -149,10 +153,6 @@
   return jit_options;
 }
 
-bool Jit::ShouldUsePriorityThreadWeight(Thread* self) {
-  return self->IsJitSensitiveThread() && Runtime::Current()->InJankPerceptibleProcessState();
-}
-
 void Jit::DumpInfo(std::ostream& os) {
   code_cache_->Dump(os);
   cumulative_timings_.Dump(os);
@@ -639,20 +639,24 @@
   return false;
 }
 
-void Jit::AddSamples(Thread* self, ArtMethod* method, uint16_t count, bool with_backedges) {
+bool Jit::MaybeCompileMethod(Thread* self,
+                             ArtMethod* method,
+                             uint32_t old_count,
+                             uint32_t new_count,
+                             bool with_backedges) {
   if (thread_pool_ == nullptr) {
     // Should only see this when shutting down, starting up, or in safe mode.
     DCHECK(Runtime::Current()->IsShuttingDown(self) ||
            !Runtime::Current()->IsFinishedStarting() ||
            Runtime::Current()->IsSafeMode());
-    return;
+    return false;
   }
   if (IgnoreSamplesForMethod(method)) {
-    return;
+    return false;
   }
   if (HotMethodThreshold() == 0) {
     // Tests might request JIT on first use (compiled synchronously in the interpreter).
-    return;
+    return false;
   }
   DCHECK(thread_pool_ != nullptr);
   DCHECK_GT(WarmMethodThreshold(), 0);
@@ -661,15 +665,9 @@
   DCHECK_GE(PriorityThreadWeight(), 1);
   DCHECK_LE(PriorityThreadWeight(), HotMethodThreshold());
 
-  uint16_t starting_count = method->GetCounter();
-  if (Jit::ShouldUsePriorityThreadWeight(self)) {
-    count *= PriorityThreadWeight();
-  }
-  uint32_t new_count = starting_count + count;
-  // Note: Native method have no "warm" state or profiling info.
-  if (LIKELY(!method->IsNative()) && starting_count < WarmMethodThreshold()) {
-    if ((new_count >= WarmMethodThreshold()) &&
-        (method->GetProfilingInfo(kRuntimePointerSize) == nullptr)) {
+  if (old_count < WarmMethodThreshold() && new_count >= WarmMethodThreshold()) {
+    // Note: Native method have no "warm" state or profiling info.
+    if (!method->IsNative() && method->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
       bool success = ProfilingInfo::Create(self, method, /* retry_allocation= */ false);
       if (success) {
         VLOG(jit) << "Start profiling " << method->PrettyMethod();
@@ -679,7 +677,7 @@
         // Calling ProfilingInfo::Create might put us in a suspended state, which could
         // lead to the thread pool being deleted when we are shutting down.
         DCHECK(Runtime::Current()->IsShuttingDown(self));
-        return;
+        return false;
       }
 
       if (!success) {
@@ -689,32 +687,27 @@
             self, new JitCompileTask(method, JitCompileTask::TaskKind::kAllocateProfile));
       }
     }
-    // Avoid jumping more than one state at a time.
-    new_count = std::min(new_count, static_cast<uint32_t>(HotMethodThreshold() - 1));
-  } else if (UseJitCompilation()) {
-    if (starting_count < HotMethodThreshold()) {
-      if ((new_count >= HotMethodThreshold()) &&
-          !code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+  }
+  if (UseJitCompilation()) {
+    if (old_count < HotMethodThreshold() && new_count >= HotMethodThreshold()) {
+      if (!code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
         DCHECK(thread_pool_ != nullptr);
         thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile));
       }
-      // Avoid jumping more than one state at a time.
-      new_count = std::min(new_count, static_cast<uint32_t>(OSRMethodThreshold() - 1));
-    } else if (starting_count < OSRMethodThreshold()) {
+    }
+    if (old_count < OSRMethodThreshold() && new_count >= OSRMethodThreshold()) {
       if (!with_backedges) {
-        // If the samples don't contain any back edge, we don't increment the hotness.
-        return;
+        return false;
       }
       DCHECK(!method->IsNative());  // No back edges reported for native methods.
-      if ((new_count >= OSRMethodThreshold()) &&  !code_cache_->IsOsrCompiled(method)) {
+      if (!code_cache_->IsOsrCompiled(method)) {
         DCHECK(thread_pool_ != nullptr);
         thread_pool_->AddTask(
             self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompileOsr));
       }
     }
   }
-  // Update hotness counter
-  method->SetCounter(new_count);
+  return true;
 }
 
 class ScopedSetRuntimeThread {
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e5c9766..714db3a 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -47,6 +47,7 @@
 // At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
 // See android/os/Process.java.
 static constexpr int kJitPoolThreadPthreadDefaultPriority = 9;
+static constexpr uint32_t kJitSamplesBatchSize = 32;  // Must be power of 2.
 
 class JitOptions {
  public:
@@ -122,12 +123,16 @@
   }
 
  private:
+  // We add the sample in batches of size kJitSamplesBatchSize.
+  // This method rounds the threshold so that it is multiple of the batch size.
+  static uint32_t RoundUpThreshold(uint32_t threshold);
+
   bool use_jit_compilation_;
   size_t code_cache_initial_capacity_;
   size_t code_cache_max_capacity_;
-  uint16_t compile_threshold_;
-  uint16_t warmup_threshold_;
-  uint16_t osr_threshold_;
+  uint32_t compile_threshold_;
+  uint32_t warmup_threshold_;
+  uint32_t osr_threshold_;
   uint16_t priority_thread_weight_;
   uint16_t invoke_transition_weight_;
   bool dump_info_on_shutdown_;
@@ -154,7 +159,7 @@
   static constexpr size_t kDefaultPriorityThreadWeightRatio = 1000;
   static constexpr size_t kDefaultInvokeTransitionWeightRatio = 500;
   // How frequently should the interpreter check to see if OSR compilation is ready.
-  static constexpr int16_t kJitRecheckOSRThreshold = 100;
+  static constexpr int16_t kJitRecheckOSRThreshold = 101;  // Prime number to avoid patterns.
 
   virtual ~Jit();
 
@@ -218,7 +223,10 @@
   void MethodEntered(Thread* thread, ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples, bool with_backedges)
+  ALWAYS_INLINE void AddSamples(Thread* self,
+                                ArtMethod* method,
+                                uint16_t samples,
+                                bool with_backedges)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(ObjPtr<mirror::Object> this_object,
@@ -298,6 +306,15 @@
  private:
   Jit(JitCodeCache* code_cache, JitOptions* options);
 
+  // Compile the method if the number of samples passes a threshold.
+  // Returns false if we can not compile now - don't increment the counter and retry later.
+  bool MaybeCompileMethod(Thread* self,
+                          ArtMethod* method,
+                          uint32_t old_count,
+                          uint32_t new_count,
+                          bool with_backedges)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static bool BindCompilerMethods(std::string* error_msg);
 
   // JIT compiler
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7eac3d9..bd0e5a4 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -34,6 +34,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
+#include <thread>
 #include <vector>
 
 #include "android-base/strings.h"
@@ -233,6 +234,7 @@
       class_linker_(nullptr),
       signal_catcher_(nullptr),
       java_vm_(nullptr),
+      thread_pool_ref_count_(0u),
       fault_message_(nullptr),
       threads_being_born_(0),
       shutdown_cond_(new ConditionVariable("Runtime shutdown", *Locks::runtime_shutdown_lock_)),
@@ -348,6 +350,8 @@
         << "\n";
   }
 
+  WaitForThreadPoolWorkersToStart();
+
   if (jit_ != nullptr) {
     // Wait for the workers to be created since there can't be any threads attaching during
     // shutdown.
@@ -400,6 +404,8 @@
     // JIT compiler threads.
     jit_->DeleteThreadPool();
   }
+  DeleteThreadPool();
+  CHECK(thread_pool_ == nullptr);
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
   GetRuntimeCallbacks()->StopDebugger();
@@ -930,6 +936,18 @@
 
   // Create the thread pools.
   heap_->CreateThreadPool();
+  {
+    ScopedTrace timing("CreateThreadPool");
+    constexpr size_t kStackSize = 64 * KB;
+    constexpr size_t kMaxRuntimeWorkers = 4u;
+    const size_t num_workers =
+        std::min(static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeWorkers);
+    MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+    CHECK(thread_pool_ == nullptr);
+    thread_pool_.reset(new ThreadPool("Runtime", num_workers, /*create_peers=*/false, kStackSize));
+    thread_pool_->StartWorkers(Thread::Current());
+  }
+
   // Reset the gc performance data at zygote fork so that the GCs
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
@@ -2658,4 +2676,45 @@
     GetClassLinker()->VisitClasses(&visitor);
   }
 }
+
+Runtime::ScopedThreadPoolUsage::ScopedThreadPoolUsage()
+    : thread_pool_(Runtime::Current()->AcquireThreadPool()) {}
+
+Runtime::ScopedThreadPoolUsage::~ScopedThreadPoolUsage() {
+  Runtime::Current()->ReleaseThreadPool();
+}
+
+bool Runtime::DeleteThreadPool() {
+  // Make sure workers are started to prevent thread shutdown errors.
+  WaitForThreadPoolWorkersToStart();
+  std::unique_ptr<ThreadPool> thread_pool;
+  {
+    MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+    if (thread_pool_ref_count_ == 0) {
+      thread_pool = std::move(thread_pool_);
+    }
+  }
+  return thread_pool != nullptr;
+}
+
+ThreadPool* Runtime::AcquireThreadPool() {
+  MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+  ++thread_pool_ref_count_;
+  return thread_pool_.get();
+}
+
+void Runtime::ReleaseThreadPool() {
+  MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+  CHECK_GT(thread_pool_ref_count_, 0u);
+  --thread_pool_ref_count_;
+}
+
+void Runtime::WaitForThreadPoolWorkersToStart() {
+  // Need to make sure workers are created before deleting the pool.
+  ScopedThreadPoolUsage stpu;
+  if (stpu.GetThreadPool() != nullptr) {
+    stpu.GetThreadPool()->WaitForWorkersToBeCreated();
+  }
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 76cfcd1..a2d519d 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -792,6 +792,28 @@
     return verifier_logging_threshold_ms_;
   }
 
+  // Atomically delete the thread pool if the reference count is 0.
+  bool DeleteThreadPool() REQUIRES(!Locks::runtime_thread_pool_lock_);
+
+  // Wait for all the thread workers to be attached.
+  void WaitForThreadPoolWorkersToStart() REQUIRES(!Locks::runtime_thread_pool_lock_);
+
+  // Scoped usage of the runtime thread pool. Prevents the pool from being
+  // deleted. Note that the thread pool is only for startup and gets deleted after.
+  class ScopedThreadPoolUsage {
+   public:
+    ScopedThreadPoolUsage();
+    ~ScopedThreadPoolUsage();
+
+    // Return the thread pool.
+    ThreadPool* GetThreadPool() const {
+      return thread_pool_;
+    }
+
+   private:
+    ThreadPool* const thread_pool_;
+  };
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -828,6 +850,9 @@
   //       friend).
   std::string GetFaultMessage();
 
+  ThreadPool* AcquireThreadPool() REQUIRES(!Locks::runtime_thread_pool_lock_);
+  void ReleaseThreadPool() REQUIRES(!Locks::runtime_thread_pool_lock_);
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
@@ -911,6 +936,10 @@
   std::unique_ptr<jit::JitCodeCache> jit_code_cache_;
   std::unique_ptr<jit::JitOptions> jit_options_;
 
+  // Runtime thread pool. The pool is only for startup and gets deleted after.
+  std::unique_ptr<ThreadPool> thread_pool_ GUARDED_BY(Locks::runtime_thread_pool_lock_);
+  size_t thread_pool_ref_count_ GUARDED_BY(Locks::runtime_thread_pool_lock_);
+
   // Fault message, printed when we get a SIGSEGV. Stored as a native-heap object and accessed
   // lock-free, so needs to be atomic.
   std::atomic<std::string*> fault_message_;
@@ -1115,6 +1144,7 @@
 
   // Note: See comments on GetFaultMessage.
   friend std::string GetFaultMessageForAbortLogging();
+  friend class ScopedThreadPoolUsage;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
diff --git a/runtime/runtime_callbacks_test.cc b/runtime/runtime_callbacks_test.cc
index f2e5012..d08be72 100644
--- a/runtime/runtime_callbacks_test.cc
+++ b/runtime/runtime_callbacks_test.cc
@@ -147,6 +147,8 @@
   self->TransitionFromSuspendedToRunnable();
   bool started = runtime_->Start();
   ASSERT_TRUE(started);
+  // Make sure the workers are done starting so we don't get callbacks for them.
+  runtime_->WaitForThreadPoolWorkersToStart();
 
   cb_.state = CallbackState::kBase;  // Ignore main thread attach.
 
diff --git a/test/1919-vminit-thread-start-timing/src/art/Test1919.java b/test/1919-vminit-thread-start-timing/src/art/Test1919.java
index 3d5c079..f6b770f 100644
--- a/test/1919-vminit-thread-start-timing/src/art/Test1919.java
+++ b/test/1919-vminit-thread-start-timing/src/art/Test1919.java
@@ -21,10 +21,12 @@
 
   public static void run() {
     for (Event e : getEvents()) {
-      if (PRINT_ALL_THREADS ||
-          e.thr.equals(Thread.currentThread()) ||
-          e.thr.getName().equals("JVMTI_THREAD-Test1919")) {
-        System.out.println(e.name + ": " + e.thr.getName());
+      if (e.thr != null) {
+        if (PRINT_ALL_THREADS ||
+            e.thr.equals(Thread.currentThread()) ||
+            e.thr.getName().equals("JVMTI_THREAD-Test1919")) {
+          System.out.println(e.name + ": " + e.thr.getName());
+        }
       }
     }
   }
diff --git a/tools/bootjars.sh b/tools/bootjars.sh
index 9f22827..320d4b5 100755
--- a/tools/bootjars.sh
+++ b/tools/bootjars.sh
@@ -75,7 +75,7 @@
   # Note: This must start with the CORE_IMG_JARS in Android.common_path.mk
   # because that's what we use for compiling the core.art image.
   # It may contain additional modules from TEST_CORE_JARS.
-  core_jars_list="core-oj core-libart core-simple"
+  core_jars_list="core-oj core-libart core-simple okhttp bouncycastle apache-xml conscrypt"
   core_jars_suffix=
   if [[ $mode == target ]]; then
     core_jars_suffix=-testdex