Reduce calls to DescriptorEquals

Store the low 3 bits of the descriptor hash inside of class set
entries. Compare these bits before comparing descriptors.

Simpleperf interpret-only compile of facebook:
mirror::Class::DescriptorEquals(char const*): 3.66% -> 1.03%

Bug: 32641252

Test: test-art-host

Change-Id: I8d898d4ac7c95383c49401fbcd85bfde226e026c
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 7bb2bb7..0fb2a8b 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1856,9 +1856,8 @@
     temp_class_table.ReadFromMemory(class_table_memory_ptr);
     CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
              table->NumZygoteClasses());
-    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
-                                                                    RootInfo(kRootUnknown));
-    temp_class_table.VisitRoots(buffered_visitor);
+    UnbufferedRootVisitor visitor(&root_visitor, RootInfo(kRootUnknown));
+    temp_class_table.VisitRoots(visitor);
   }
 }
 
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index db28a3f..cb5a790 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -605,8 +605,7 @@
   ClassTable temp_table;
   temp_table.ReadFromMemory(image_->Begin() + section.Offset());
   FixupRootVisitor visitor(this);
-  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&visitor, RootInfo(kRootUnknown));
-  temp_table.VisitRoots(buffered_visitor);
+  temp_table.VisitRoots(UnbufferedRootVisitor(&visitor, RootInfo(kRootUnknown)));
 }
 
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 674bad7..319991b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1352,12 +1352,12 @@
           ObjPtr<mirror::Class> klass = types[j].Read();
           if (space->HasAddress(klass.Ptr())) {
             DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
-            auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
+            auto it = new_class_set->Find(ClassTable::TableSlot(klass));
             DCHECK(it != new_class_set->end());
             DCHECK_EQ(it->Read(), klass);
             ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
             if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
-              auto it2 = new_class_set->Find(GcRoot<mirror::Class>(super_class));
+              auto it2 = new_class_set->Find(ClassTable::TableSlot(super_class));
               DCHECK(it2 != new_class_set->end());
               DCHECK_EQ(it2->Read(), super_class);
             }
@@ -1859,7 +1859,7 @@
     UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
                                                        class_loader.Get(),
                                                        forward_dex_cache_arrays);
-    for (GcRoot<mirror::Class>& root : temp_set) {
+    for (ClassTable::TableSlot& root : temp_set) {
       visitor(root.Read());
     }
     // forward_dex_cache_arrays is true iff we copied all of the dex cache arrays into the .bss.
@@ -1910,8 +1910,6 @@
   const bool tracing_enabled = Trace::IsTracingEnabled();
   Thread* const self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
-      visitor, RootInfo(kRootStickyClass));
   if ((flags & kVisitRootFlagAllRoots) != 0) {
     // Argument for how root visiting deals with ArtField and ArtMethod roots.
     // There is 3 GC cases to handle:
@@ -1928,8 +1926,12 @@
     // Moving concurrent:
     // Need to make sure to not copy ArtMethods without doing read barriers since the roots are
     // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy.
-    boot_class_table_.VisitRoots(buffered_visitor);
-
+    //
+    // Use an unbuffered visitor since the class table uses a temporary GcRoot for holding decoded
+    // ClassTable::TableSlot. The buffered root visiting would access a stale stack location for
+    // these objects.
+    UnbufferedRootVisitor root_visitor(visitor, RootInfo(kRootStickyClass));
+    boot_class_table_.VisitRoots(root_visitor);
     // If tracing is enabled, then mark all the class loaders to prevent unloading.
     if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) {
       for (const ClassLoaderData& data : class_loaders_) {
@@ -1946,7 +1948,6 @@
       CHECK_EQ(new_ref, old_ref);
     }
   }
-  buffered_visitor.Flush();  // Flush before clearing new_class_roots_.
   if ((flags & kVisitRootFlagClearRootLog) != 0) {
     new_class_roots_.clear();
   }
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index 3e54a64..229cd47 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -26,8 +26,8 @@
 void ClassTable::VisitRoots(Visitor& visitor) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      visitor.VisitRoot(root.AddressWithoutBarrier());
+    for (TableSlot& table_slot : class_set) {
+      table_slot.VisitRoot(visitor);
     }
   }
   for (GcRoot<mirror::Object>& root : strong_roots_) {
@@ -44,8 +44,8 @@
 void ClassTable::VisitRoots(const Visitor& visitor) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      visitor.VisitRoot(root.AddressWithoutBarrier());
+    for (TableSlot& table_slot : class_set) {
+      table_slot.VisitRoot(visitor);
     }
   }
   for (GcRoot<mirror::Object>& root : strong_roots_) {
@@ -62,8 +62,8 @@
 bool ClassTable::Visit(Visitor& visitor) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      if (!visitor(root.Read())) {
+    for (TableSlot& table_slot : class_set) {
+      if (!visitor(table_slot.Read())) {
         return false;
       }
     }
@@ -71,6 +71,51 @@
   return true;
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+inline mirror::Class* ClassTable::TableSlot::Read() const {
+  const uint32_t before = data_.LoadRelaxed();
+  ObjPtr<mirror::Class> const before_ptr(ExtractPtr(before));
+  ObjPtr<mirror::Class> const after_ptr(
+      GcRoot<mirror::Class>(before_ptr).Read<kReadBarrierOption>());
+  if (kReadBarrierOption != kWithoutReadBarrier && before_ptr != after_ptr) {
+    // If another thread raced and updated the reference, do not store the read barrier updated
+    // one.
+    data_.CompareExchangeStrongRelaxed(before, Encode(after_ptr, MaskHash(before)));
+  }
+  return after_ptr.Ptr();
+}
+
+template<typename Visitor>
+inline void ClassTable::TableSlot::VisitRoot(const Visitor& visitor) const {
+  const uint32_t before = data_.LoadRelaxed();
+  ObjPtr<mirror::Class> before_ptr(ExtractPtr(before));
+  GcRoot<mirror::Class> root(before_ptr);
+  visitor.VisitRoot(root.AddressWithoutBarrier());
+  ObjPtr<mirror::Class> after_ptr(root.Read<kWithoutReadBarrier>());
+  if (before_ptr != after_ptr) {
+    // If another thread raced and updated the reference, do not store the read barrier updated
+    // one.
+    data_.CompareExchangeStrongRelaxed(before, Encode(after_ptr, MaskHash(before)));
+  }
+}
+
+inline ObjPtr<mirror::Class> ClassTable::TableSlot::ExtractPtr(uint32_t data) {
+  return reinterpret_cast<mirror::Class*>(data & ~kHashMask);
+}
+
+inline uint32_t ClassTable::TableSlot::Encode(ObjPtr<mirror::Class> klass, uint32_t hash_bits) {
+  DCHECK_LE(hash_bits, kHashMask);
+  return reinterpret_cast<uintptr_t>(klass.Ptr()) | hash_bits;
+}
+
+inline ClassTable::TableSlot::TableSlot(ObjPtr<mirror::Class> klass, uint32_t descriptor_hash)
+    : data_(Encode(klass, MaskHash(descriptor_hash))) {
+  if (kIsDebugBuild) {
+    std::string temp;
+    const uint32_t hash = ComputeModifiedUtf8Hash(klass->GetDescriptor(&temp));
+    CHECK_EQ(descriptor_hash, hash);
+  }
+}
 
 }  // namespace art
 
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 0fcce6b..6eb7496 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -34,7 +34,7 @@
 bool ClassTable::Contains(ObjPtr<mirror::Class> klass) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
-    auto it = class_set.Find(GcRoot<mirror::Class>(klass));
+    auto it = class_set.Find(TableSlot(klass));
     if (it != class_set.end()) {
       return it->Read() == klass;
     }
@@ -45,7 +45,7 @@
 mirror::Class* ClassTable::LookupByDescriptor(ObjPtr<mirror::Class> klass) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
-    auto it = class_set.Find(GcRoot<mirror::Class>(klass));
+    auto it = class_set.Find(TableSlot(klass));
     if (it != class_set.end()) {
       return it->Read();
     }
@@ -60,10 +60,11 @@
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
   WriterMutexLock mu(Thread::Current(), lock_);
   // Should only be updating latest table.
-  auto existing_it = classes_.back().FindWithHash(descriptor, hash);
+  DescriptorHashPair pair(descriptor, hash);
+  auto existing_it = classes_.back().FindWithHash(pair, hash);
   if (kIsDebugBuild && existing_it == classes_.back().end()) {
     for (const ClassSet& class_set : classes_) {
-      if (class_set.FindWithHash(descriptor, hash) != class_set.end()) {
+      if (class_set.FindWithHash(pair, hash) != class_set.end()) {
         LOG(FATAL) << "Updating class found in frozen table " << descriptor;
       }
     }
@@ -77,7 +78,7 @@
   VerifyObject(klass);
   // Update the element in the hash set with the new class. This is safe to do since the descriptor
   // doesn't change.
-  *existing_it = GcRoot<mirror::Class>(klass);
+  *existing_it = TableSlot(klass, hash);
   return existing;
 }
 
@@ -99,8 +100,9 @@
 
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
   ReaderMutexLock mu(Thread::Current(), lock_);
+  DescriptorHashPair pair(descriptor, hash);
   for (ClassSet& class_set : classes_) {
-    auto it = class_set.FindWithHash(descriptor, hash);
+    auto it = class_set.FindWithHash(pair, hash);
     if (it != class_set.end()) {
      return it->Read();
     }
@@ -110,22 +112,23 @@
 
 void ClassTable::Insert(ObjPtr<mirror::Class> klass) {
   WriterMutexLock mu(Thread::Current(), lock_);
-  classes_.back().Insert(GcRoot<mirror::Class>(klass));
+  classes_.back().Insert(TableSlot(klass));
 }
 
 void ClassTable::InsertWithoutLocks(ObjPtr<mirror::Class> klass) {
-  classes_.back().Insert(GcRoot<mirror::Class>(klass));
+  classes_.back().Insert(TableSlot(klass));
 }
 
 void ClassTable::InsertWithHash(ObjPtr<mirror::Class> klass, size_t hash) {
   WriterMutexLock mu(Thread::Current(), lock_);
-  classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
+  classes_.back().InsertWithHash(TableSlot(klass, hash), hash);
 }
 
 bool ClassTable::Remove(const char* descriptor) {
   WriterMutexLock mu(Thread::Current(), lock_);
+  DescriptorHashPair pair(descriptor, ComputeModifiedUtf8Hash(descriptor));
   for (ClassSet& class_set : classes_) {
-    auto it = class_set.Find(descriptor);
+    auto it = class_set.Find(pair);
     if (it != class_set.end()) {
       class_set.Erase(it);
       return true;
@@ -134,26 +137,35 @@
   return false;
 }
 
-uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const TableSlot& slot)
     const {
   std::string temp;
-  return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
+  return ComputeModifiedUtf8Hash(slot.Read()->GetDescriptor(&temp));
 }
 
-bool ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                       const GcRoot<mirror::Class>& b) const {
+bool ClassTable::ClassDescriptorHashEquals::operator()(const TableSlot& a,
+                                                       const TableSlot& b) const {
+  if (a.Hash() != b.Hash()) {
+    std::string temp;
+    DCHECK(!a.Read()->DescriptorEquals(b.Read()->GetDescriptor(&temp)));
+    return false;
+  }
   DCHECK_EQ(a.Read()->GetClassLoader(), b.Read()->GetClassLoader());
   std::string temp;
   return a.Read()->DescriptorEquals(b.Read()->GetDescriptor(&temp));
 }
 
-bool ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                       const char* descriptor) const {
-  return a.Read()->DescriptorEquals(descriptor);
+bool ClassTable::ClassDescriptorHashEquals::operator()(const TableSlot& a,
+                                                       const DescriptorHashPair& b) const {
+  if (!a.MaskedHashEquals(b.second)) {
+    DCHECK(!a.Read()->DescriptorEquals(b.first));
+    return false;
+  }
+  return a.Read()->DescriptorEquals(b.first);
 }
 
-uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
-  return ComputeModifiedUtf8Hash(descriptor);
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const DescriptorHashPair& pair) const {
+  return ComputeModifiedUtf8Hash(pair.first);
 }
 
 bool ClassTable::InsertStrongRoot(ObjPtr<mirror::Object> obj) {
@@ -197,7 +209,7 @@
   // Combine all the class sets in case there are multiple, also adjusts load factor back to
   // default in case classes were pruned.
   for (const ClassSet& class_set : classes_) {
-    for (const GcRoot<mirror::Class>& root : class_set) {
+    for (const TableSlot& root : class_set) {
       combined.Insert(root);
     }
   }
@@ -227,4 +239,11 @@
   oat_files_.clear();
   strong_roots_.clear();
 }
+
+ClassTable::TableSlot::TableSlot(ObjPtr<mirror::Class> klass) {
+  std::string temp;
+  data_.StoreRelaxed(Encode(klass.Ptr(),
+                            MaskHash(ComputeModifiedUtf8Hash(klass->GetDescriptor(&temp)))));
+}
+
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 558c144..fe0bbb3 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -42,33 +42,91 @@
 // Each loader has a ClassTable
 class ClassTable {
  public:
+  class TableSlot {
+   public:
+    TableSlot() : data_(0u) {}
+
+    TableSlot(const TableSlot& copy) : data_(copy.data_.LoadRelaxed()) {}
+
+    explicit TableSlot(ObjPtr<mirror::Class> klass);
+
+    TableSlot(ObjPtr<mirror::Class> klass, uint32_t descriptor_hash);
+
+    TableSlot& operator=(const TableSlot& copy) {
+      data_.StoreRelaxed(copy.data_.LoadRelaxed());
+      return *this;
+    }
+
+    bool IsNull() const REQUIRES_SHARED(Locks::mutator_lock_) {
+      return Read<kWithoutReadBarrier>() == nullptr;
+    }
+
+    uint32_t Hash() const {
+      return MaskHash(data_.LoadRelaxed());
+    }
+
+    static uint32_t MaskHash(uint32_t hash) {
+      return hash & kHashMask;
+    }
+
+    bool MaskedHashEquals(uint32_t other) const {
+      return MaskHash(other) == Hash();
+    }
+
+    template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+    mirror::Class* Read() const REQUIRES_SHARED(Locks::mutator_lock_);
+
+    // NO_THREAD_SAFETY_ANALYSIS since the visitor may require heap bitmap lock.
+    template<typename Visitor>
+    void VisitRoot(const Visitor& visitor) const NO_THREAD_SAFETY_ANALYSIS;
+
+   private:
+    // Extract a raw pointer from an address.
+    static ObjPtr<mirror::Class> ExtractPtr(uint32_t data)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+
+    static uint32_t Encode(ObjPtr<mirror::Class> klass, uint32_t hash_bits)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+
+    // Data contains the class pointer GcRoot as well as the low bits of the descriptor hash.
+    mutable Atomic<uint32_t> data_;
+    static const uint32_t kHashMask = kObjectAlignment - 1;
+  };
+
+  using DescriptorHashPair = std::pair<const char*, uint32_t>;
+
   class ClassDescriptorHashEquals {
    public:
     // uint32_t for cross compilation.
-    uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
+    uint32_t operator()(const TableSlot& slot) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
-    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
+    bool operator()(const TableSlot& a, const TableSlot& b) const
         NO_THREAD_SAFETY_ANALYSIS;
     // Same descriptor.
-    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
+    bool operator()(const TableSlot& a, const DescriptorHashPair& b) const
         NO_THREAD_SAFETY_ANALYSIS;
     // uint32_t for cross compilation.
-    uint32_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+    uint32_t operator()(const DescriptorHashPair& pair) const NO_THREAD_SAFETY_ANALYSIS;
   };
-  class GcRootEmptyFn {
+
+  class TableSlotEmptyFn {
    public:
-    void MakeEmpty(GcRoot<mirror::Class>& item) const {
-      item = GcRoot<mirror::Class>();
+    void MakeEmpty(TableSlot& item) const NO_THREAD_SAFETY_ANALYSIS {
+      item = TableSlot();
+      DCHECK(IsEmpty(item));
     }
-    bool IsEmpty(const GcRoot<mirror::Class>& item) const {
+    bool IsEmpty(const TableSlot& item) const NO_THREAD_SAFETY_ANALYSIS {
       return item.IsNull();
     }
   };
-  // hash set which hashes class descriptor, and compares descriptors and class loaders. Results
-  // should be compared for a matching Class descriptor and class loader.
-  typedef HashSet<GcRoot<mirror::Class>, GcRootEmptyFn, ClassDescriptorHashEquals,
-      ClassDescriptorHashEquals, TrackingAllocator<GcRoot<mirror::Class>, kAllocatorTagClassTable>>
-      ClassSet;
+
+  // Hash set that hashes class descriptor, and compares descriptors and class loaders. Results
+  // should be compared for a matching class descriptor and class loader.
+  typedef HashSet<TableSlot,
+                  TableSlotEmptyFn,
+                  ClassDescriptorHashEquals,
+                  ClassDescriptorHashEquals,
+                  TrackingAllocator<TableSlot, kAllocatorTagClassTable>> ClassSet;
 
   ClassTable();
 
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 85cd0a4..b795409 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -267,6 +267,43 @@
   size_t buffer_pos_;
 };
 
+class UnbufferedRootVisitor {
+ public:
+  UnbufferedRootVisitor(RootVisitor* visitor, const RootInfo& root_info)
+      : visitor_(visitor), root_info_(root_info) {}
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(GcRoot<MirrorType>& root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (!root.IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<MirrorType>* root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  void VisitRoot(GcRoot<MirrorType>& root) const REQUIRES_SHARED(Locks::mutator_lock_) {
+    VisitRoot(root.AddressWithoutBarrier());
+  }
+
+  template <class MirrorType>
+  void VisitRoot(mirror::CompressedReference<MirrorType>* root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    visitor_->VisitRoots(&root, 1, root_info_);
+  }
+
+ private:
+  RootVisitor* const visitor_;
+  RootInfo root_info_;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GC_ROOT_H_
diff --git a/runtime/oat.h b/runtime/oat.h
index 8c84d42..0f4cbbb7 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '9', '2', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '3', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";