Merge "Fix core jars list in bootjars.sh ."
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 0039be0..f52c566 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1231,8 +1231,15 @@
     bool operator()(ObjPtr<mirror::Class> klass) override REQUIRES_SHARED(Locks::mutator_lock_) {
       std::string temp;
       StringPiece name(klass->GetDescriptor(&temp));
-      if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {
-        data_->image_classes_.push_back(hs_.NewHandle(klass));
+      auto it = data_->image_class_descriptors_->find(name);
+      if (it != data_->image_class_descriptors_->end()) {
+        if (LIKELY(klass->IsResolved())) {
+          data_->image_classes_.push_back(hs_.NewHandle(klass));
+        } else {
+          DCHECK(klass->IsErroneousUnresolved());
+          VLOG(compiler) << "Removing unresolved class from image classes: " << name;
+          data_->image_class_descriptors_->erase(it);
+        }
       } else {
         // Check whether it is initialized and has a clinit. They must be kept, too.
         if (klass->IsInitialized() && klass->FindClassInitializer(
diff --git a/libprofile/profile/profile_compilation_info.cc b/libprofile/profile/profile_compilation_info.cc
index 02f6344..9b32b9e 100644
--- a/libprofile/profile/profile_compilation_info.cc
+++ b/libprofile/profile/profile_compilation_info.cc
@@ -58,6 +58,12 @@
 // profile_compilation_info object. All the profile line headers are now placed together
 // before corresponding method_encodings and class_ids.
 const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '1', '0', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersionWithCounters[] = { '5', '0', '0', '\0' };
+
+static_assert(sizeof(ProfileCompilationInfo::kProfileVersion) == 4,
+              "Invalid profile version size");
+static_assert(sizeof(ProfileCompilationInfo::kProfileVersionWithCounters) == 4,
+              "Invalid profile version size");
 
 // The name of the profile entry in the dex metadata file.
 // DO NOT CHANGE THIS! (it's similar to classes.dex in the apk files).
@@ -84,18 +90,31 @@
   return kDebugIgnoreChecksum || dex_file_checksum == checksum;
 }
 
+// For storage efficiency we store aggregation counts of up to at most 2^16.
+static uint16_t IncrementAggregationCounter(uint16_t counter, uint16_t value) {
+  if (counter < (std::numeric_limits<uint16_t>::max() - value)) {
+    return counter + value;
+  } else {
+    return std::numeric_limits<uint16_t>::max();
+  }
+}
+
 ProfileCompilationInfo::ProfileCompilationInfo(ArenaPool* custom_arena_pool)
     : default_arena_pool_(),
       allocator_(custom_arena_pool),
       info_(allocator_.Adapter(kArenaAllocProfile)),
-      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)) {
+      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)),
+      aggregation_count_(0) {
+  InitProfileVersionInternal(kProfileVersion);
 }
 
 ProfileCompilationInfo::ProfileCompilationInfo()
     : default_arena_pool_(),
       allocator_(&default_arena_pool_),
       info_(allocator_.Adapter(kArenaAllocProfile)),
-      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)) {
+      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)),
+      aggregation_count_(0) {
+  InitProfileVersionInternal(kProfileVersion);
 }
 
 ProfileCompilationInfo::~ProfileCompilationInfo() {
@@ -326,13 +345,15 @@
 /**
  * Serialization format:
  * [profile_header, zipped[[profile_line_header1, profile_line_header2...],[profile_line_data1,
- *    profile_line_data2...]]]
+ *    profile_line_data2...]],global_aggregation_counter]
  * profile_header:
  *   magic,version,number_of_dex_files,uncompressed_size_of_zipped_data,compressed_data_size
  * profile_line_header:
  *   dex_location,number_of_classes,methods_region_size,dex_location_checksum,num_method_ids
  * profile_line_data:
- *   method_encoding_1,method_encoding_2...,class_id1,class_id2...,startup/post startup bitmap
+ *   method_encoding_1,method_encoding_2...,class_id1,class_id2...,startup/post startup bitmap,
+ *   num_classes,class_counters,num_methods,method_counters
+ * The aggregation counters are only stored if the profile version is kProfileVersionWithCounters.
  * The method_encoding is:
  *    method_id,number_of_inline_caches,inline_cache1,inline_cache2...
  * The inline_cache is:
@@ -355,7 +376,7 @@
   if (!WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic))) {
     return false;
   }
-  if (!WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion))) {
+  if (!WriteBuffer(fd, version_, sizeof(version_))) {
     return false;
   }
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
@@ -370,7 +391,17 @@
         sizeof(uint16_t) * dex_data.class_set.size() +
         methods_region_size +
         dex_data.bitmap_storage.size();
+    if (StoresAggregationCounters()) {
+      required_capacity += sizeof(uint16_t) +  // num class counters
+          sizeof(uint16_t) * dex_data.class_set.size() +
+          sizeof(uint16_t) +  // num method counter
+          sizeof(uint16_t) * dex_data_ptr->GetNumMethodCounters();
+    }
   }
+  if (StoresAggregationCounters()) {
+    required_capacity += sizeof(uint16_t);  // global counter
+  }
+
   // Allow large profiles for non target builds for the case where we are merging many profiles
   // to generate a boot image profile.
   if (kIsTargetBuild && required_capacity > kProfileSizeErrorThresholdInBytes) {
@@ -443,6 +474,24 @@
     buffer.insert(buffer.end(),
                   dex_data.bitmap_storage.begin(),
                   dex_data.bitmap_storage.end());
+
+    if (StoresAggregationCounters()) {
+      AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
+      for (const auto& class_id : dex_data.class_set) {
+        uint16_t type_idx = class_id.index_;
+        AddUintToBuffer(&buffer, dex_data.class_counters[type_idx]);
+      }
+      AddUintToBuffer(&buffer, dex_data.GetNumMethodCounters());
+      for (uint16_t method_idx = 0; method_idx < dex_data.num_method_ids; method_idx++) {
+        if (dex_data.GetHotnessInfo(method_idx).IsInProfile()) {
+          AddUintToBuffer(&buffer, dex_data.method_counters[method_idx]);
+        }
+      }
+    }
+  }
+
+  if (StoresAggregationCounters()) {
+    AddUintToBuffer(&buffer, aggregation_count_);
   }
 
   uint32_t output_size = 0;
@@ -583,7 +632,8 @@
         profile_key,
         checksum,
         profile_index,
-        num_method_ids);
+        num_method_ids,
+        StoresAggregationCounters());
     info_.push_back(dex_file_data);
   }
   DexFileData* result = info_[profile_index];
@@ -943,7 +993,7 @@
   // Read magic and version
   const size_t kMagicVersionSize =
     sizeof(kProfileMagic) +
-    sizeof(kProfileVersion) +
+    kProfileVersionSize +
     sizeof(uint8_t) +  // number of dex files
     sizeof(uint32_t) +  // size of uncompressed profile data
     sizeof(uint32_t);  // size of compressed profile data
@@ -959,10 +1009,18 @@
     *error = "Profile missing magic";
     return kProfileLoadVersionMismatch;
   }
-  if (!safe_buffer.CompareAndAdvance(kProfileVersion, sizeof(kProfileVersion))) {
+  if (safe_buffer.CountUnreadBytes() < kProfileVersionSize) {
+     *error = "Cannot read profile version";
+     return kProfileLoadBadData;
+  }
+  memcpy(version_, safe_buffer.GetCurrentPtr(), kProfileVersionSize);
+  safe_buffer.Advance(kProfileVersionSize);
+  if ((memcmp(version_, kProfileVersion, kProfileVersionSize) != 0) &&
+      (memcmp(version_, kProfileVersionWithCounters, kProfileVersionSize) != 0)) {
     *error = "Profile version mismatch";
     return kProfileLoadVersionMismatch;
   }
+
   if (!safe_buffer.ReadUintAndAdvance<uint8_t>(number_of_dex_files)) {
     *error = "Cannot read the number of dex files";
     return kProfileLoadBadData;
@@ -1047,6 +1105,7 @@
     }
   }
 
+  // Read method bitmap.
   const size_t bytes = data->bitmap_storage.size();
   if (buffer.CountUnreadBytes() < bytes) {
     *error += "Profile EOF reached prematurely for ReadProfileHeaderDexLocation";
@@ -1055,10 +1114,51 @@
   const uint8_t* base_ptr = buffer.GetCurrentPtr();
   std::copy_n(base_ptr, bytes, data->bitmap_storage.data());
   buffer.Advance(bytes);
-  // Read method bitmap.
+
+  if (StoresAggregationCounters()) {
+    ReadAggregationCounters(buffer, *data, error);
+  }
+
   return kProfileLoadSuccess;
 }
 
+bool ProfileCompilationInfo::ReadAggregationCounters(
+      SafeBuffer& buffer,
+      DexFileData& dex_data,
+      /*out*/std::string* error) {
+  size_t unread_bytes_before_op = buffer.CountUnreadBytes();
+  size_t expected_byte_count = sizeof(uint16_t) *
+      (dex_data.class_set.size() + dex_data.method_map.size() + 2);
+  if (unread_bytes_before_op < expected_byte_count) {
+    *error += "Profile EOF reached prematurely for ReadAggregationCounters";
+    return false;
+  }
+
+  uint16_t num_class_counters;
+  READ_UINT(uint16_t, buffer, num_class_counters, error);
+  if (num_class_counters != dex_data.class_set.size()) {
+    *error = "Invalid class size when reading counters";
+    return false;
+  }
+  for (const auto& class_it : dex_data.class_set) {
+    READ_UINT(uint16_t, buffer, dex_data.class_counters[class_it.index_], error);
+  }
+
+  uint16_t num_method_counters;
+  READ_UINT(uint16_t, buffer, num_method_counters, error);
+  if (num_method_counters != dex_data.GetNumMethodCounters()) {
+    *error = "Invalid class size when reading counters";
+    return false;
+  }
+  for (uint16_t method_idx = 0; method_idx < dex_data.num_method_ids; method_idx++) {
+    if (dex_data.GetHotnessInfo(method_idx).IsInProfile()) {
+      READ_UINT(uint16_t, buffer, dex_data.method_counters[method_idx], error);
+    }
+  }
+
+  return true;
+}
+
 // TODO(calin): Fix this API. ProfileCompilationInfo::Load should be static and
 // return a unique pointer to a ProfileCompilationInfo upon success.
 bool ProfileCompilationInfo::Load(
@@ -1370,9 +1470,17 @@
     }
   }
 
+  if (StoresAggregationCounters()) {
+    if (!uncompressed_data.ReadUintAndAdvance<uint16_t>(&aggregation_count_)) {
+      *error = "Cannot read the global aggregation count";
+      return kProfileLoadBadData;
+    }
+  }
+
   // Check that we read everything and that profiles don't contain junk data.
   if (uncompressed_data.CountUnreadBytes() > 0) {
-    *error = "Unexpected content in the profile file";
+    *error = "Unexpected content in the profile file: " +
+        std::to_string(uncompressed_data.CountUnreadBytes()) + " extra bytes";
     return kProfileLoadBadData;
   } else {
     return kProfileLoadSuccess;
@@ -1518,6 +1626,33 @@
                                                                  other_dex_data->checksum));
     DCHECK(dex_data != nullptr);
 
+    // Merge counters for methods and class. Must be done before we merge the bitmaps so that
+    // we can tell if the data is new or not.
+    if (StoresAggregationCounters()) {
+      // Class aggregation counters.
+      if (merge_classes) {
+        for (const dex::TypeIndex& type_idx : other_dex_data->class_set) {
+          uint16_t amount = other.StoresAggregationCounters()
+              ? other_dex_data->class_counters[type_idx.index_]
+              : (dex_data->ContainsClass(type_idx) ? 1 : 0);
+
+          dex_data->class_counters[type_idx.index_] =
+              IncrementAggregationCounter(dex_data->class_counters[type_idx.index_], amount);
+        }
+      }
+
+      // Method aggregation counters.
+      for (uint16_t method_idx = 0; method_idx < other_dex_data->num_method_ids; method_idx++) {
+        if (other_dex_data->GetHotnessInfo(method_idx).IsInProfile()) {
+          uint16_t amount = other.StoresAggregationCounters()
+              ? other_dex_data->method_counters[method_idx]
+              : (dex_data->GetHotnessInfo(method_idx).IsInProfile() ? 1 : 0);
+          dex_data->method_counters[method_idx] =
+              IncrementAggregationCounter(dex_data->method_counters[method_idx], amount);
+        }
+      }
+    }
+
     // Merge the classes.
     if (merge_classes) {
       dex_data->class_set.insert(other_dex_data->class_set.begin(),
@@ -1552,6 +1687,13 @@
     // Merge the method bitmaps.
     dex_data->MergeBitmap(*other_dex_data);
   }
+
+  // Global aggregation counter.
+  if (StoresAggregationCounters()) {
+    uint16_t amount = other.StoresAggregationCounters() ? other.aggregation_count_ : 1;
+    aggregation_count_ = IncrementAggregationCounter(aggregation_count_, amount);
+  }
+
   return true;
 }
 
@@ -1614,11 +1756,7 @@
 
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const {
   const DexFileData* dex_data = FindDexData(&dex_file);
-  if (dex_data != nullptr) {
-    const ArenaSet<dex::TypeIndex>& classes = dex_data->class_set;
-    return classes.find(type_idx) != classes.end();
-  }
-  return false;
+  return (dex_data != nullptr) && dex_data->ContainsClass(type_idx);
 }
 
 uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
@@ -1753,6 +1891,9 @@
 bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
   // No need to compare profile_key_map_. That's only a cache for fast search.
   // All the information is already in the info_ vector.
+  if (memcmp(version_, other.version_, kProfileVersionSize) != 0) {
+    return false;
+  }
   if (info_.size() != other.info_.size()) {
     return false;
   }
@@ -1763,6 +1904,9 @@
       return false;
     }
   }
+  if (aggregation_count_ != other.aggregation_count_) {
+    return false;
+  }
   return true;
 }
 
@@ -1965,9 +2109,8 @@
   SetMethodHotness(index, flags);
 
   if ((flags & MethodHotness::kFlagHot) != 0) {
-    method_map.FindOrAdd(
-        index,
-        InlineCacheMap(std::less<uint16_t>(), allocator_->Adapter(kArenaAllocProfile)));
+    ProfileCompilationInfo::InlineCacheMap* result = FindOrAddMethod(index);
+    DCHECK(result != nullptr);
   }
   return true;
 }
@@ -2000,6 +2143,43 @@
   return ret;
 }
 
+int32_t ProfileCompilationInfo::DexFileData::GetMethodAggregationCounter(
+      uint16_t method_idx) const {
+  CHECK_GT(method_counters.size(), method_idx) << "Profile not prepared for aggregation counters";
+  if (!GetHotnessInfo(method_idx).IsInProfile()) {
+    return -1;
+  }
+
+  return method_counters[method_idx];
+}
+
+int32_t ProfileCompilationInfo::DexFileData::GetClassAggregationCounter(uint16_t type_idx) const {
+  CHECK_GT(class_counters.size(), type_idx) << "Profile not prepared for aggregation counters";
+  if (!ContainsClass(dex::TypeIndex(type_idx))) {
+    return -1;
+  }
+
+  return class_counters[type_idx];
+}
+
+int32_t ProfileCompilationInfo::GetMethodAggregationCounter(
+      const MethodReference& method_ref) const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  const DexFileData* dex_data = FindDexData(method_ref.dex_file);
+  return dex_data == nullptr ? -1 : dex_data->GetMethodAggregationCounter(method_ref.index);
+}
+
+int32_t ProfileCompilationInfo::GetClassAggregationCounter(const TypeReference& type_ref) const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  const DexFileData* dex_data = FindDexData(type_ref.dex_file);
+  return dex_data == nullptr ? -1 : dex_data->GetClassAggregationCounter(type_ref.index);
+}
+
+uint16_t ProfileCompilationInfo::GetAggregationCounter() const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  return aggregation_count_;
+}
+
 ProfileCompilationInfo::DexPcData*
 ProfileCompilationInfo::FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc) {
   return &(inline_cache->FindOrAdd(dex_pc, DexPcData(&allocator_))->second);
@@ -2096,4 +2276,46 @@
   profile_key_map_.clear();
 }
 
+bool ProfileCompilationInfo::StoresAggregationCounters() const {
+  return memcmp(version_, kProfileVersionWithCounters, sizeof(kProfileVersionWithCounters)) == 0;
+}
+
+void ProfileCompilationInfo::PrepareForAggregationCounters() {
+  InitProfileVersionInternal(kProfileVersionWithCounters);
+  for (DexFileData* dex_data : info_) {
+    dex_data->PrepareForAggregationCounters();
+  }
+}
+
+void ProfileCompilationInfo::DexFileData::PrepareForAggregationCounters() {
+  method_counters.resize(num_method_ids);
+  // TODO(calin): we should store the maximum number of types in the profile.
+  // It will simplify quite a few things and make this storage allocation
+  // more efficient.
+  size_t max_elems = 1 << (kBitsPerByte * sizeof(uint16_t));
+  class_counters.resize(max_elems);
+}
+
+const uint8_t* ProfileCompilationInfo::GetVersion() const {
+  return version_;
+}
+
+void ProfileCompilationInfo::InitProfileVersionInternal(const uint8_t version[]) {
+  CHECK(
+      (memcmp(version, kProfileVersion, kProfileVersionSize) == 0) ||
+      (memcmp(version, kProfileVersionWithCounters, kProfileVersionSize) == 0));
+  memcpy(version_, version, kProfileVersionSize);
+}
+
+uint16_t ProfileCompilationInfo::DexFileData::GetNumMethodCounters() const {
+  uint16_t num_method_counters = 0;
+  for (uint16_t method_idx = 0; method_idx < num_method_ids; method_idx++) {
+    num_method_counters += GetHotnessInfo(method_idx).IsInProfile() ? 1 : 0;
+  }
+  return num_method_counters;
+}
+
+bool ProfileCompilationInfo::DexFileData::ContainsClass(const dex::TypeIndex type_index) const {
+  return class_set.find(type_index) != class_set.end();
+}
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info.h b/libprofile/profile/profile_compilation_info.h
index 92fa098..fa4615b 100644
--- a/libprofile/profile/profile_compilation_info.h
+++ b/libprofile/profile/profile_compilation_info.h
@@ -73,9 +73,10 @@
  public:
   static const uint8_t kProfileMagic[];
   static const uint8_t kProfileVersion[];
-
+  static const uint8_t kProfileVersionWithCounters[];
   static const char kDexMetadataProfileEntry[];
 
+  static constexpr size_t kProfileVersionSize = 4;
   static constexpr uint8_t kIndividualInlineCacheSize = 5;
 
   // Data structures for encoding the offline representation of inline caches.
@@ -447,6 +448,30 @@
   // Clears all the data from the profile.
   void ClearData();
 
+  // Prepare the profile to store aggregation counters.
+  // This will change the profile version and allocate extra storage for the counters.
+  // It allocates 2 bytes for every possible method and class, so do not use in performance
+  // critical code which needs to be memory efficient.
+  void PrepareForAggregationCounters();
+
+  // Returns true if the profile is configured to store aggregation counters.
+  bool StoresAggregationCounters() const;
+
+  // Returns the aggregation counter for the given method.
+  // Returns -1 if the method is not in the profile.
+  // CHECKs that the profile is configured to store aggregations counters.
+  int32_t GetMethodAggregationCounter(const MethodReference& method_ref) const;
+  // Returns the aggregation counter for the given class.
+  // Returns -1 if the class is not in the profile.
+  // CHECKs that the profile is configured to store aggregations counters.
+  int32_t GetClassAggregationCounter(const TypeReference& type_ref) const;
+  // Returns the number of times the profile was merged.
+  // CHECKs that the profile is configured to store aggregations counters.
+  uint16_t GetAggregationCounter() const;
+
+  // Return the version of this profile.
+  const uint8_t* GetVersion() const;
+
  private:
   enum ProfileLoadStatus {
     kProfileLoadWouldOverwiteData,
@@ -470,7 +495,8 @@
                 const std::string& key,
                 uint32_t location_checksum,
                 uint16_t index,
-                uint32_t num_methods)
+                uint32_t num_methods,
+                bool store_aggregation_counters)
         : allocator_(allocator),
           profile_key(key),
           profile_index(index),
@@ -478,13 +504,18 @@
           method_map(std::less<uint16_t>(), allocator->Adapter(kArenaAllocProfile)),
           class_set(std::less<dex::TypeIndex>(), allocator->Adapter(kArenaAllocProfile)),
           num_method_ids(num_methods),
-          bitmap_storage(allocator->Adapter(kArenaAllocProfile)) {
+          bitmap_storage(allocator->Adapter(kArenaAllocProfile)),
+          method_counters(allocator->Adapter(kArenaAllocProfile)),
+          class_counters(allocator->Adapter(kArenaAllocProfile)) {
       bitmap_storage.resize(ComputeBitmapStorage(num_method_ids));
       if (!bitmap_storage.empty()) {
         method_bitmap =
             BitMemoryRegion(MemoryRegion(
                 &bitmap_storage[0], bitmap_storage.size()), 0, ComputeBitmapBits(num_method_ids));
       }
+      if (store_aggregation_counters) {
+        PrepareForAggregationCounters();
+      }
     }
 
     static size_t ComputeBitmapBits(uint32_t num_method_ids) {
@@ -495,7 +526,13 @@
     }
 
     bool operator==(const DexFileData& other) const {
-      return checksum == other.checksum && method_map == other.method_map;
+      return checksum == other.checksum &&
+          num_method_ids == other.num_method_ids &&
+          method_map == other.method_map &&
+          class_set == other.class_set &&
+          (BitMemoryRegion::Compare(method_bitmap, other.method_bitmap) == 0) &&
+          class_counters == other.class_counters &&
+          method_counters == other.method_counters;
     }
 
     // Mark a method as executed at least once.
@@ -510,6 +547,14 @@
 
     void SetMethodHotness(size_t index, MethodHotness::Flag flags);
     MethodHotness GetHotnessInfo(uint32_t dex_method_index) const;
+    void PrepareForAggregationCounters();
+
+    int32_t GetMethodAggregationCounter(uint16_t method_index) const;
+    int32_t GetClassAggregationCounter(uint16_t type_index) const;
+
+    uint16_t GetNumMethodCounters() const;
+
+    bool ContainsClass(const dex::TypeIndex type_index) const;
 
     // The allocator used to allocate new inline cache maps.
     ArenaAllocator* const allocator_;
@@ -519,7 +564,7 @@
     uint8_t profile_index;
     // The dex checksum.
     uint32_t checksum;
-    // The methonds' profile information.
+    // The methods' profile information.
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
@@ -531,6 +576,8 @@
     uint32_t num_method_ids;
     ArenaVector<uint8_t> bitmap_storage;
     BitMemoryRegion method_bitmap;
+    ArenaVector<uint16_t> method_counters;
+    ArenaVector<uint16_t> class_counters;
 
    private:
     enum BitmapIndex {
@@ -761,6 +808,11 @@
                    const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
                    /*out*/std::string* error);
 
+  // Read the aggregation counters from the buffer.
+  bool ReadAggregationCounters(SafeBuffer& buffer,
+                               DexFileData& dex_data,
+                               /*out*/std::string* error);
+
   // The method generates mapping of profile indices while merging a new profile
   // data into current data. It returns true, if the mapping was successful.
   bool RemapProfileIndex(const std::vector<ProfileLineHeader>& profile_line_headers,
@@ -792,6 +844,9 @@
   // if no previous data exists.
   DexPcData* FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc);
 
+  // Initializes the profile version to the desired one.
+  void InitProfileVersionInternal(const uint8_t version[]);
+
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
@@ -809,6 +864,14 @@
   // This is used to speed up searches since it avoids iterating
   // over the info_ vector when searching by profile key.
   ArenaSafeMap<const std::string, uint8_t> profile_key_map_;
+
+  // The version of the profile.
+  // This may change if a "normal" profile is transformed to keep track
+  // of aggregation counters.
+  uint8_t version_[kProfileVersionSize];
+
+  // Stored only when the profile is configured to keep track of aggregation counters.
+  uint16_t aggregation_count_;
 };
 
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info_test.cc b/libprofile/profile/profile_compilation_info_test.cc
index a2bfe50..47019c4 100644
--- a/libprofile/profile/profile_compilation_info_test.cc
+++ b/libprofile/profile/profile_compilation_info_test.cc
@@ -1141,4 +1141,180 @@
   ASSERT_TRUE(loaded_info.Equals(info));
 }
 
+TEST_F(ProfileCompilationInfoTest, PrepareForAggregationCounters) {
+  ProfileCompilationInfo info;
+  ASSERT_EQ(
+      memcmp(info.GetVersion(),
+             ProfileCompilationInfo::kProfileVersion,
+             ProfileCompilationInfo::kProfileVersionSize),
+      0);
+
+  info.PrepareForAggregationCounters();
+
+  ASSERT_EQ(
+      memcmp(info.GetVersion(),
+             ProfileCompilationInfo::kProfileVersionWithCounters,
+             ProfileCompilationInfo::kProfileVersionSize),
+      0);
+  ASSERT_TRUE(info.StoresAggregationCounters());
+  ASSERT_EQ(info.GetAggregationCounter(), 0);
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeWithAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+  ProfileCompilationInfo info3;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  info1.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+  info2.AddMethodIndex(Hotness::kFlagPostStartup, location, checksum, 3, kMaxMethodIds);
+  info3.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+
+  AddMethod(location, checksum, /* method_idx= */ 6, &info2);
+  AddMethod(location, checksum, /* method_idx= */ 6, &info3);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+  AddClass(location, checksum, dex::TypeIndex(20), &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(30), &info1);
+  AddClass(location, checksum, dex::TypeIndex(30), &info2);
+  AddClass(location, checksum, dex::TypeIndex(30), &info3);
+
+  ASSERT_EQ(info1.GetAggregationCounter(), 0);
+  info1.MergeWith(info2);
+  ASSERT_EQ(info1.GetAggregationCounter(), 1);
+  info1.MergeWith(info3);
+  ASSERT_EQ(info1.GetAggregationCounter(), 2);
+
+  ASSERT_EQ(0, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+  ASSERT_EQ(2, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 3)));
+  ASSERT_EQ(1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 6)));
+
+  ASSERT_EQ(0, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(1, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+  ASSERT_EQ(2, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(30))));
+
+  // Check methods that do not exists.
+  ASSERT_EQ(-1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 4)));
+  ASSERT_EQ(-1, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(40))));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveAndLoadAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+  ProfileCompilationInfo info3;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  info1.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+  info2.AddMethodIndex(Hotness::kFlagPostStartup, location, checksum, 3, kMaxMethodIds);
+  info3.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+
+  AddMethod(location, checksum, /* method_idx= */ 6, &info2);
+  AddMethod(location, checksum, /* method_idx= */ 6, &info3);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+  AddClass(location, checksum, dex::TypeIndex(20), &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(30), &info1);
+  AddClass(location, checksum, dex::TypeIndex(30), &info2);
+  AddClass(location, checksum, dex::TypeIndex(30), &info3);
+
+  info1.MergeWith(info2);
+  info1.MergeWith(info3);
+
+  ScratchFile profile;
+
+  ASSERT_TRUE(info1.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  loaded_info.PrepareForAggregationCounters();
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(info1));
+
+  ASSERT_EQ(2, loaded_info.GetAggregationCounter());
+
+  ASSERT_EQ(0, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(1, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+  ASSERT_EQ(2, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 3)));
+  ASSERT_EQ(1, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 6)));
+
+  ASSERT_EQ(0, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(1, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+  ASSERT_EQ(2, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(30))));
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeTwoWithAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+  AddClass(location, checksum, dex::TypeIndex(10), &info2);
+
+  info1.MergeWith(info2);
+  info1.MergeWith(info2);
+  ASSERT_EQ(2, info1.GetAggregationCounter());
+
+  // Save and load the profile to create a copy of the data
+  ScratchFile profile;
+  info1.Save(GetFd(profile));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  loaded_info.PrepareForAggregationCounters();
+  profile.GetFile()->ResetOffset();
+  loaded_info.Load(GetFd(profile));
+
+  // Merge the data
+  info1.MergeWith(loaded_info);
+
+  ASSERT_EQ(4, info1.GetAggregationCounter());
+
+  ASSERT_EQ(0, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(4, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+
+  ASSERT_EQ(4, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(0, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+}
+
 }  // namespace art
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index 4dc5262..b65bb43 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -32,7 +32,8 @@
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
         const std::vector<ScopedFlock>& profile_files,
         const ScopedFlock& reference_profile_file,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   DCHECK(!profile_files.empty());
 
   ProfileCompilationInfo info;
@@ -42,6 +43,12 @@
     return kErrorBadProfiles;
   }
 
+  // If we need to store aggregation counters (e.g. for the boot image profile),
+  // prepare the reference profile now.
+  if (store_aggregation_counters) {
+    info.PrepareForAggregationCounters();
+  }
+
   // Store the current state of the reference profile before merging with the current profiles.
   uint32_t number_of_methods = info.GetNumberOfMethods();
   uint32_t number_of_classes = info.GetNumberOfResolvedClasses();
@@ -124,7 +131,8 @@
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<int>& profile_files_fd,
         int reference_profile_file_fd,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   DCHECK_GE(reference_profile_file_fd, 0);
 
   std::string error;
@@ -147,13 +155,15 @@
 
   return ProcessProfilesInternal(profile_files.Get(),
                                  reference_profile_file,
-                                 filter_fn);
+                                 filter_fn,
+                                 store_aggregation_counters);
 }
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<std::string>& profile_files,
         const std::string& reference_profile_file,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   std::string error;
 
   ScopedFlockList profile_files_list(profile_files.size());
@@ -171,7 +181,8 @@
 
   return ProcessProfilesInternal(profile_files_list.Get(),
                                  locked_reference_profile_file,
-                                 filter_fn);
+                                 filter_fn,
+                                 store_aggregation_counters);
 }
 
 }  // namespace art
diff --git a/profman/profile_assistant.h b/profman/profile_assistant.h
index c1d6f8e..45d4e38 100644
--- a/profman/profile_assistant.h
+++ b/profman/profile_assistant.h
@@ -55,19 +55,22 @@
       const std::vector<std::string>& profile_files,
       const std::string& reference_profile_file,
       const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
-          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll,
+      bool store_aggregation_counters = false);
 
   static ProcessingResult ProcessProfiles(
       const std::vector<int>& profile_files_fd_,
       int reference_profile_file_fd,
       const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
-          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll,
+      bool store_aggregation_counters = false);
 
  private:
   static ProcessingResult ProcessProfilesInternal(
       const std::vector<ScopedFlock>& profile_files,
       const ScopedFlock& reference_profile_file,
-      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn);
+      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+      bool store_aggregation_counters);
 
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index e9d3290..e906151 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -102,7 +102,7 @@
       }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
-      ASSERT_TRUE(info->AddClassIndex(dex_location1,
+      ASSERT_TRUE(info->AddClassIndex(ProfileCompilationInfo::GetProfileDexFileKey(dex_location1),
                                       dex_location_checksum1,
                                       dex::TypeIndex(i),
                                       number_of_methods1));
@@ -1300,4 +1300,57 @@
   }
 }
 
+TEST_F(ProfileAssistantTest, MergeProfilesWithCounters) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  // The new profile info will contain methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  const uint16_t kNumberOfClasses = 50;
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+  const DexFile& d1 = *dex_files[0];
+  const DexFile& d2 = *dex_files[1];
+  ProfileCompilationInfo info1;
+  SetupProfile(
+      d1.GetLocation(), d1.GetLocationChecksum(),
+      d2.GetLocation(), d2.GetLocationChecksum(),
+      kNumberOfMethodsToEnableCompilation, kNumberOfClasses, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile(
+      d1.GetLocation(), d1.GetLocationChecksum(),
+      d2.GetLocation(), d2.GetLocationChecksum(),
+      kNumberOfMethodsToEnableCompilation, kNumberOfClasses, profile2, &info2);
+
+  std::string profman_cmd = GetProfmanCmd();
+  std::vector<std::string> argv_str;
+  argv_str.push_back(profman_cmd);
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile1.GetFd()));
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile2.GetFd()));
+  argv_str.push_back("--reference-profile-file-fd=" + std::to_string(reference_profile.GetFd()));
+  argv_str.push_back("--store-aggregation-counters");
+  std::string error;
+
+  EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0) << error;
+
+  // Verify that we can load the result and that the counters are in place.
+
+  ProfileCompilationInfo result;
+  result.PrepareForAggregationCounters();
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile.GetFd()));
+
+  ASSERT_TRUE(result.StoresAggregationCounters());
+  ASSERT_EQ(2, result.GetAggregationCounter());
+
+  for (uint16_t i = 0; i < kNumberOfMethodsToEnableCompilation; i++) {
+    ASSERT_EQ(1, result.GetMethodAggregationCounter(MethodReference(&d1, i)));
+    ASSERT_EQ(1, result.GetMethodAggregationCounter(MethodReference(&d2, i)));
+  }
+  for (uint16_t i = 0; i < kNumberOfClasses; i++) {
+    ASSERT_EQ(1, result.GetClassAggregationCounter(TypeReference(&d1, dex::TypeIndex(i))));
+  }
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index 2935a05..a0c387d 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -157,6 +157,9 @@
   UsageError("      the file passed with --profile-fd(file) to the profile passed with");
   UsageError("      --reference-profile-fd(file) and update at the same time the profile-key");
   UsageError("      of entries corresponding to the apks passed with --apk(-fd).");
+  UsageError("  --store-aggregation-counters: if present, profman will compute and store");
+  UsageError("      the aggregation counters of classes and methods in the output profile.");
+  UsageError("      In this case the profile will have a different version.");
   UsageError("");
 
   exit(EXIT_FAILURE);
@@ -200,7 +203,8 @@
       test_profile_class_percentage_(kDefaultTestProfileClassPercentage),
       test_profile_seed_(NanoTime()),
       start_ns_(NanoTime()),
-      copy_and_update_profile_key_(false) {}
+      copy_and_update_profile_key_(false),
+      store_aggregation_counters_(false) {}
 
   ~ProfMan() {
     LogCompletionTime();
@@ -287,6 +291,8 @@
         ParseUintOption(option, "--generate-test-profile-seed", &test_profile_seed_, Usage);
       } else if (option.starts_with("--copy-and-update-profile-key")) {
         copy_and_update_profile_key_ = true;
+      } else if (option.starts_with("--store-aggregation-counters")) {
+        store_aggregation_counters_ = true;
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -363,12 +369,14 @@
       File file(reference_profile_file_fd_, false);
       result = ProfileAssistant::ProcessProfiles(profile_files_fd_,
                                                  reference_profile_file_fd_,
-                                                 filter_fn);
+                                                 filter_fn,
+                                                 store_aggregation_counters_);
       CloseAllFds(profile_files_fd_, "profile_files_fd_");
     } else {
       result = ProfileAssistant::ProcessProfiles(profile_files_,
                                                  reference_profile_file_,
-                                                 filter_fn);
+                                                 filter_fn,
+                                                 store_aggregation_counters_);
     }
     return result;
   }
@@ -1279,6 +1287,7 @@
   uint32_t test_profile_seed_;
   uint64_t start_ns_;
   bool copy_and_update_profile_key_;
+  bool store_aggregation_counters_;
 };
 
 // See ProfileAssistant::ProcessingResult for return codes.
diff --git a/runtime/base/locks.cc b/runtime/base/locks.cc
index cfc9f1d..a7922a2 100644
--- a/runtime/base/locks.cc
+++ b/runtime/base/locks.cc
@@ -61,6 +61,7 @@
 Mutex* Locks::reference_queue_soft_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_weak_references_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::runtime_thread_pool_lock_ = nullptr;
 Mutex* Locks::cha_lock_ = nullptr;
 Mutex* Locks::subtype_check_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
@@ -154,6 +155,7 @@
     DCHECK(user_code_suspension_lock_ != nullptr);
     DCHECK(dex_lock_ != nullptr);
     DCHECK(native_debug_interface_lock_ != nullptr);
+    DCHECK(runtime_thread_pool_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -189,6 +191,10 @@
     DCHECK(runtime_shutdown_lock_ == nullptr);
     runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kRuntimeThreadPoolLock);
+    DCHECK(runtime_thread_pool_lock_ == nullptr);
+    runtime_thread_pool_lock_ = new Mutex("runtime thread pool lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock);
     DCHECK(profiler_lock_ == nullptr);
     profiler_lock_ = new Mutex("profiler lock", current_lock_level);
diff --git a/runtime/base/locks.h b/runtime/base/locks.h
index 8cbe372..57719f1 100644
--- a/runtime/base/locks.h
+++ b/runtime/base/locks.h
@@ -117,6 +117,7 @@
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
+  kRuntimeThreadPoolLock,
   kRuntimeShutdownLock,
   kTraceLock,
   kHeapBitmapLock,
@@ -224,8 +225,11 @@
   // Guards shutdown of the runtime.
   static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
 
+  // Runtime thread pool lock.
+  static Mutex* runtime_thread_pool_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+
   // Guards background profiler global state.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_thread_pool_lock_);
 
   // Guards trace (ie traceview) requests.
   static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index bf8aaae..8f9967f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1416,6 +1416,11 @@
   TrimSpaces(self);
   // Trim arenas that may have been used by JIT or verifier.
   runtime->GetArenaPool()->TrimMaps();
+  {
+    // TODO: Move this to a callback called when startup is finished (b/120671223).
+    ScopedTrace trace2("Delete thread pool");
+    runtime->DeleteThreadPool();
+  }
 }
 
 class TrimIndirectReferenceTableClosure : public Closure {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 66db063..4f9b3f9 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -21,7 +21,6 @@
 #include <unistd.h>
 
 #include <random>
-#include <thread>
 
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
@@ -685,40 +684,12 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     TimingLogger logger(__PRETTY_FUNCTION__, /*precise=*/ true, VLOG_IS_ON(image));
 
-    std::unique_ptr<ThreadPool> thread_pool;
     std::unique_ptr<ImageSpace> space = Init(image_filename,
                                              image_location,
                                              oat_file,
                                              &logger,
-                                             &thread_pool,
                                              image_reservation,
                                              error_msg);
-    if (thread_pool != nullptr) {
-      // Delay the thread pool deletion to prevent the deletion slowing down the startup by causing
-      // preemption. TODO: Just do this in heap trim.
-      static constexpr uint64_t kThreadPoolDeleteDelay = MsToNs(5000);
-
-      class DeleteThreadPoolTask : public HeapTask {
-       public:
-        explicit DeleteThreadPoolTask(std::unique_ptr<ThreadPool>&& thread_pool)
-            : HeapTask(NanoTime() + kThreadPoolDeleteDelay), thread_pool_(std::move(thread_pool)) {}
-
-        void Run(Thread* self) override {
-          ScopedTrace trace("DestroyThreadPool");
-          ScopedThreadStateChange stsc(self, kNative);
-          thread_pool_.reset();
-        }
-
-       private:
-        std::unique_ptr<ThreadPool> thread_pool_;
-      };
-      gc::TaskProcessor* const processor = Runtime::Current()->GetHeap()->GetTaskProcessor();
-      // The thread pool is already done being used since Init has finished running. Deleting the
-      // thread pool is done async since it takes a non-trivial amount of time to do.
-      if (processor != nullptr) {
-        processor->AddTask(Thread::Current(), new DeleteThreadPoolTask(std::move(thread_pool)));
-      }
-    }
     if (space != nullptr) {
       uint32_t expected_reservation_size =
           RoundUp(space->GetImageHeader().GetImageSize(), kPageSize);
@@ -779,7 +750,6 @@
                                           const char* image_location,
                                           const OatFile* oat_file,
                                           TimingLogger* logger,
-                                          std::unique_ptr<ThreadPool>* thread_pool,
                                           /*inout*/MemMap* image_reservation,
                                           /*out*/std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -856,18 +826,6 @@
       return nullptr;
     }
 
-    const size_t kMinBlocks = 2;
-    if (thread_pool != nullptr && image_header->GetBlockCount() >= kMinBlocks) {
-      TimingLogger::ScopedTiming timing("CreateThreadPool", logger);
-      ScopedThreadStateChange stsc(Thread::Current(), kNative);
-      constexpr size_t kStackSize = 64 * KB;
-      constexpr size_t kMaxRuntimeWorkers = 4u;
-      const size_t num_workers =
-          std::min(static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeWorkers);
-      thread_pool->reset(new ThreadPool("Image", num_workers, /*create_peers=*/false, kStackSize));
-      thread_pool->get()->StartWorkers(Thread::Current());
-    }
-
     // GetImageBegin is the preferred address to map the image. If we manage to map the
     // image at the image begin, the amount of fixup work required is minimized.
     // If it is pic we will retry with error_msg for the failure case. Pass a null error_msg to
@@ -880,7 +838,6 @@
         *image_header,
         file->Fd(),
         logger,
-        thread_pool != nullptr ? thread_pool->get() : nullptr,
         image_reservation,
         error_msg);
     if (!map.IsValid()) {
@@ -971,7 +928,6 @@
                               const ImageHeader& image_header,
                               int fd,
                               TimingLogger* logger,
-                              ThreadPool* pool,
                               /*inout*/MemMap* image_reservation,
                               /*out*/std::string* error_msg) {
     TimingLogger::ScopedTiming timing("MapImageFile", logger);
@@ -1015,9 +971,12 @@
       }
       memcpy(map.Begin(), &image_header, sizeof(ImageHeader));
 
+      Runtime::ScopedThreadPoolUsage stpu;
+      ThreadPool* const pool = stpu.GetThreadPool();
       const uint64_t start = NanoTime();
       Thread* const self = Thread::Current();
-      const bool use_parallel = pool != nullptr;
+      static constexpr size_t kMinBlocks = 2u;
+      const bool use_parallel = pool != nullptr && image_header.GetBlockCount() >= kMinBlocks;
       for (const ImageHeader::Block& block : image_header.GetBlocks(temp_map.Begin())) {
         auto function = [&](Thread*) {
           const uint64_t start2 = NanoTime();
@@ -1915,7 +1874,6 @@
                         image_location.c_str(),
                         /*oat_file=*/ nullptr,
                         logger,
-                        /*thread_pool=*/ nullptr,
                         image_reservation,
                         error_msg);
   }
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 2e41a9d..067c1fa 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -253,13 +253,6 @@
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
 
-  // Check that we are using the right interpreter.
-  if (kIsDebugBuild && self->UseMterp() != CanUseMterp()) {
-    // The flag might be currently being updated on all threads. Retry with lock.
-    MutexLock tll_mu(self, *Locks::thread_list_lock_);
-    DCHECK_EQ(self->UseMterp(), CanUseMterp());
-  }
-
   if (LIKELY(!from_deoptimize)) {  // Entering the method, but not via deoptimization.
     if (kIsDebugBuild) {
       CHECK_EQ(shadow_frame.GetDexPC(), 0u);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index a633a63..62f5d91 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -140,8 +140,10 @@
                                    uint16_t inst_data,
                                    JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_EQ(self->UseMterp(), CanUseMterp());
   // Make sure to check for async exceptions before anything else.
-  if (is_mterp && self->UseMterp()) {
+  if (is_mterp) {
+    DCHECK(self->UseMterp());
     DCHECK(!self->ObserveAsyncException());
   } else if (UNLIKELY(self->ObserveAsyncException())) {
     return false;
@@ -219,7 +221,7 @@
   // If the bit is not set, we explicitly recheck all the conditions.
   // If any of the conditions get falsified, it is important to clear the bit.
   bool use_fast_path = false;
-  if (is_mterp && self->UseMterp()) {
+  if (is_mterp) {
     use_fast_path = called_method->UseFastInterpreterToInterpreterInvoke();
     if (!use_fast_path) {
       use_fast_path = UseFastInterpreterToInterpreterInvoke(called_method);
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 912c444..fd1430a 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -546,12 +546,7 @@
 
 extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Check that we are using the right interpreter.
-  if (kIsDebugBuild && self->UseMterp() != CanUseMterp()) {
-    // The flag might be currently being updated on all threads. Retry with lock.
-    MutexLock tll_mu(self, *Locks::thread_list_lock_);
-    DCHECK_EQ(self->UseMterp(), CanUseMterp());
-  }
+  DCHECK(self->UseMterp());
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   uint16_t inst_data = inst->Fetch16(0);
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 5c5523d..de4826f 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -581,9 +581,9 @@
     const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
     oat += dex_file_location_size;
 
-    std::string dex_file_location = ResolveRelativeEncodedDexLocation(
-        abs_dex_location,
-        std::string(dex_file_location_data, dex_file_location_size));
+    std::string dex_file_location(dex_file_location_data, dex_file_location_size);
+    std::string dex_file_name =
+        ResolveRelativeEncodedDexLocation(abs_dex_location, dex_file_location);
 
     uint32_t dex_file_checksum;
     if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_checksum))) {
@@ -638,7 +638,7 @@
                                            error_msg,
                                            uncompressed_dex_files_.get());
         } else {
-          loaded = dex_file_loader.Open(dex_file_location.c_str(),
+          loaded = dex_file_loader.Open(dex_file_name.c_str(),
                                         dex_file_location,
                                         /*verify=*/ false,
                                         /*verify_checksum=*/ false,
@@ -819,7 +819,7 @@
         this, header->string_ids_size_, sizeof(GcRoot<mirror::String>), string_bss_mapping);
 
     std::string canonical_location =
-        DexFileLoader::GetDexCanonicalLocation(dex_file_location.c_str());
+        DexFileLoader::GetDexCanonicalLocation(dex_file_name.c_str());
 
     // Create the OatDexFile and add it to the owning container.
     OatDexFile* oat_dex_file = new OatDexFile(this,
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 2ffaf98..c7731f4 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -28,6 +28,7 @@
 #include "gc_root-inl.h"
 #include "interpreter/mterp/mterp.h"
 #include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread_list.h"
 
 namespace art {
@@ -90,12 +91,23 @@
 }
 
 template<typename Action>
-void Runtime::DoAndMaybeSwitchInterpreter(Action lamda) {
-  MutexLock tll_mu(Thread::Current(), *Locks::thread_list_lock_);
-  lamda();
-  Runtime::Current()->GetThreadList()->ForEach([](Thread* thread, void*) {
-      thread->tls32_.use_mterp.store(interpreter::CanUseMterp());
-  }, nullptr);
+void Runtime::DoAndMaybeSwitchInterpreter(Action lambda) {
+  Thread* self = Thread::Current();
+  if (Runtime::Current()->IsShuttingDown(self) || Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    MutexLock tll_mu(self, *Locks::thread_list_lock_);
+    lambda();
+    Runtime::Current()->GetThreadList()->ForEach([](Thread* thread, void*) {
+        thread->tls32_.use_mterp.store(interpreter::CanUseMterp());
+    }, nullptr);
+  } else {
+    ScopedThreadStateChange tsc(self, kSuspended);
+    ScopedSuspendAll ssa(__FUNCTION__);
+    MutexLock tll_mu(self, *Locks::thread_list_lock_);
+    lambda();
+    Runtime::Current()->GetThreadList()->ForEach([](Thread* thread, void*) {
+        thread->tls32_.use_mterp.store(interpreter::CanUseMterp());
+    }, nullptr);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7eac3d9..bd0e5a4 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -34,6 +34,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
+#include <thread>
 #include <vector>
 
 #include "android-base/strings.h"
@@ -233,6 +234,7 @@
       class_linker_(nullptr),
       signal_catcher_(nullptr),
       java_vm_(nullptr),
+      thread_pool_ref_count_(0u),
       fault_message_(nullptr),
       threads_being_born_(0),
       shutdown_cond_(new ConditionVariable("Runtime shutdown", *Locks::runtime_shutdown_lock_)),
@@ -348,6 +350,8 @@
         << "\n";
   }
 
+  WaitForThreadPoolWorkersToStart();
+
   if (jit_ != nullptr) {
     // Wait for the workers to be created since there can't be any threads attaching during
     // shutdown.
@@ -400,6 +404,8 @@
     // JIT compiler threads.
     jit_->DeleteThreadPool();
   }
+  DeleteThreadPool();
+  CHECK(thread_pool_ == nullptr);
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
   GetRuntimeCallbacks()->StopDebugger();
@@ -930,6 +936,18 @@
 
   // Create the thread pools.
   heap_->CreateThreadPool();
+  {
+    ScopedTrace timing("CreateThreadPool");
+    constexpr size_t kStackSize = 64 * KB;
+    constexpr size_t kMaxRuntimeWorkers = 4u;
+    const size_t num_workers =
+        std::min(static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeWorkers);
+    MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+    CHECK(thread_pool_ == nullptr);
+    thread_pool_.reset(new ThreadPool("Runtime", num_workers, /*create_peers=*/false, kStackSize));
+    thread_pool_->StartWorkers(Thread::Current());
+  }
+
   // Reset the gc performance data at zygote fork so that the GCs
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
@@ -2658,4 +2676,45 @@
     GetClassLinker()->VisitClasses(&visitor);
   }
 }
+
+Runtime::ScopedThreadPoolUsage::ScopedThreadPoolUsage()
+    : thread_pool_(Runtime::Current()->AcquireThreadPool()) {}
+
+Runtime::ScopedThreadPoolUsage::~ScopedThreadPoolUsage() {
+  Runtime::Current()->ReleaseThreadPool();
+}
+
+bool Runtime::DeleteThreadPool() {
+  // Make sure workers are started to prevent thread shutdown errors.
+  WaitForThreadPoolWorkersToStart();
+  std::unique_ptr<ThreadPool> thread_pool;
+  {
+    MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+    if (thread_pool_ref_count_ == 0) {
+      thread_pool = std::move(thread_pool_);
+    }
+  }
+  return thread_pool != nullptr;
+}
+
+ThreadPool* Runtime::AcquireThreadPool() {
+  MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+  ++thread_pool_ref_count_;
+  return thread_pool_.get();
+}
+
+void Runtime::ReleaseThreadPool() {
+  MutexLock mu(Thread::Current(), *Locks::runtime_thread_pool_lock_);
+  CHECK_GT(thread_pool_ref_count_, 0u);
+  --thread_pool_ref_count_;
+}
+
+void Runtime::WaitForThreadPoolWorkersToStart() {
+  // Need to make sure workers are created before deleting the pool.
+  ScopedThreadPoolUsage stpu;
+  if (stpu.GetThreadPool() != nullptr) {
+    stpu.GetThreadPool()->WaitForWorkersToBeCreated();
+  }
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 76cfcd1..00158b8 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -669,7 +669,7 @@
   // It ensures that two calls do not interfere with each other and
   // it makes it possible to DCHECK that thread local flag is correct.
   template<typename Action>
-  static void DoAndMaybeSwitchInterpreter(Action lamda);
+  static void DoAndMaybeSwitchInterpreter(Action lambda);
 
   // Returns the build fingerprint, if set. Otherwise an empty string is returned.
   std::string GetFingerprint() {
@@ -792,6 +792,28 @@
     return verifier_logging_threshold_ms_;
   }
 
+  // Atomically delete the thread pool if the reference count is 0.
+  bool DeleteThreadPool() REQUIRES(!Locks::runtime_thread_pool_lock_);
+
+  // Wait for all the thread workers to be attached.
+  void WaitForThreadPoolWorkersToStart() REQUIRES(!Locks::runtime_thread_pool_lock_);
+
+  // Scoped usage of the runtime thread pool. Prevents the pool from being
+  // deleted. Note that the thread pool is only for startup and gets deleted after.
+  class ScopedThreadPoolUsage {
+   public:
+    ScopedThreadPoolUsage();
+    ~ScopedThreadPoolUsage();
+
+    // Return the thread pool.
+    ThreadPool* GetThreadPool() const {
+      return thread_pool_;
+    }
+
+   private:
+    ThreadPool* const thread_pool_;
+  };
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -828,6 +850,9 @@
   //       friend).
   std::string GetFaultMessage();
 
+  ThreadPool* AcquireThreadPool() REQUIRES(!Locks::runtime_thread_pool_lock_);
+  void ReleaseThreadPool() REQUIRES(!Locks::runtime_thread_pool_lock_);
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
@@ -911,6 +936,10 @@
   std::unique_ptr<jit::JitCodeCache> jit_code_cache_;
   std::unique_ptr<jit::JitOptions> jit_options_;
 
+  // Runtime thread pool. The pool is only for startup and gets deleted after.
+  std::unique_ptr<ThreadPool> thread_pool_ GUARDED_BY(Locks::runtime_thread_pool_lock_);
+  size_t thread_pool_ref_count_ GUARDED_BY(Locks::runtime_thread_pool_lock_);
+
   // Fault message, printed when we get a SIGSEGV. Stored as a native-heap object and accessed
   // lock-free, so needs to be atomic.
   std::atomic<std::string*> fault_message_;
@@ -1115,6 +1144,7 @@
 
   // Note: See comments on GetFaultMessage.
   friend std::string GetFaultMessageForAbortLogging();
+  friend class ScopedThreadPoolUsage;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
diff --git a/runtime/runtime_callbacks_test.cc b/runtime/runtime_callbacks_test.cc
index f2e5012..d08be72 100644
--- a/runtime/runtime_callbacks_test.cc
+++ b/runtime/runtime_callbacks_test.cc
@@ -147,6 +147,8 @@
   self->TransitionFromSuspendedToRunnable();
   bool started = runtime_->Start();
   ASSERT_TRUE(started);
+  // Make sure the workers are done starting so we don't get callbacks for them.
+  runtime_->WaitForThreadPoolWorkersToStart();
 
   cb_.state = CallbackState::kBase;  // Ignore main thread attach.
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index f459f9c..a97e4cc 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4168,7 +4168,11 @@
 
 void Thread::SetAsyncException(ObjPtr<mirror::Throwable> new_exception) {
   CHECK(new_exception != nullptr);
-  Runtime::Current()->SetAsyncExceptionsThrown();
+  {
+    StackHandleScope<1> hs(Thread::Current());
+    auto h_exception = hs.NewHandleWrapper(&new_exception);
+    Runtime::Current()->SetAsyncExceptionsThrown();
+  }
   if (kIsDebugBuild) {
     // Make sure we are in a checkpoint.
     MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_);
diff --git a/test/1919-vminit-thread-start-timing/src/art/Test1919.java b/test/1919-vminit-thread-start-timing/src/art/Test1919.java
index 3d5c079..f6b770f 100644
--- a/test/1919-vminit-thread-start-timing/src/art/Test1919.java
+++ b/test/1919-vminit-thread-start-timing/src/art/Test1919.java
@@ -21,10 +21,12 @@
 
   public static void run() {
     for (Event e : getEvents()) {
-      if (PRINT_ALL_THREADS ||
-          e.thr.equals(Thread.currentThread()) ||
-          e.thr.getName().equals("JVMTI_THREAD-Test1919")) {
-        System.out.println(e.name + ": " + e.thr.getName());
+      if (e.thr != null) {
+        if (PRINT_ALL_THREADS ||
+            e.thr.equals(Thread.currentThread()) ||
+            e.thr.getName().equals("JVMTI_THREAD-Test1919")) {
+          System.out.println(e.name + ": " + e.thr.getName());
+        }
       }
     }
   }