Various debug ART GC performance improvements

Removed unnecessary read barriers in SizeOf and a few other places.
Disabled disallow read barrier check.

Before:
GC time: 15.817s
Real 0m26.113s
user  1m16.780s
sys 0m3.152s

After:
GC time: 9.212s
real  0m19.875s
user  1m9.916s
sys 0m1.916s

Bug: 35644369
Test: test-art-host

Change-Id: I79a65259deff2a478a96e02ae69b14730b6dcbe6
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 24ba52f..bcf5008 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1108,7 +1108,9 @@
     space::RegionSpace* region_space = collector->RegionSpace();
     CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
-    obj->VisitReferences(visitor, visitor);
+    obj->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+        visitor,
+        visitor);
     if (kUseBakerReadBarrier) {
       CHECK_EQ(obj->GetReadBarrierState(), ReadBarrier::WhiteState())
           << "obj=" << obj << " non-white rb_state " << obj->GetReadBarrierState();
@@ -1232,7 +1234,9 @@
     CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
     collector->AssertToSpaceInvariant(nullptr, MemberOffset(0), obj);
     AssertToSpaceInvariantFieldVisitor visitor(collector);
-    obj->VisitReferences(visitor, visitor);
+    obj->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+        visitor,
+        visitor);
   }
 
  private:
@@ -1471,8 +1475,7 @@
     // Add to the live bytes per unevacuated from space. Note this code is always run by the
     // GC-running thread (no synchronization required).
     DCHECK(region_space_bitmap_->Test(to_ref));
-    // Disable the read barrier in SizeOf for performance, which is safe.
-    size_t obj_size = to_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    size_t obj_size = to_ref->SizeOf<kDefaultVerifyFlags>();
     size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
     region_space_->AddLiveBytes(to_ref, alloc_size);
   }
@@ -1714,16 +1717,19 @@
 }
 
 // Assert the to-space invariant.
-void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
+void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj,
+                                               MemberOffset offset,
                                                mirror::Object* ref) {
-  CHECK(heap_->collector_type_ == kCollectorTypeCC) << static_cast<size_t>(heap_->collector_type_);
+  CHECK_EQ(heap_->collector_type_, kCollectorTypeCC);
   if (is_asserting_to_space_invariant_) {
-    if (region_space_->IsInToSpace(ref)) {
+    using RegionType = space::RegionSpace::RegionType;
+    space::RegionSpace::RegionType type = region_space_->GetRegionType(ref);
+    if (type == RegionType::kRegionTypeToSpace) {
       // OK.
       return;
-    } else if (region_space_->IsInUnevacFromSpace(ref)) {
+    } else if (type == RegionType::kRegionTypeUnevacFromSpace) {
       CHECK(IsMarkedInUnevacFromSpace(ref)) << ref;
-    } else if (region_space_->IsInFromSpace(ref)) {
+    } else if (UNLIKELY(type == RegionType::kRegionTypeFromSpace)) {
       // Not OK. Do extra logging.
       if (obj != nullptr) {
         LogFromSpaceRefHolder(obj, offset);
@@ -1888,10 +1894,10 @@
   explicit RefFieldsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
-  void operator()(ObjPtr<mirror::Object> obj, MemberOffset offset, bool /* is_static */)
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
       const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
-    collector_->Process(obj.Ptr(), offset);
+    collector_->Process(obj, offset);
   }
 
   void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
@@ -2064,7 +2070,7 @@
     AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object.Ptr());
     CHECK_EQ(byte_size, (java_lang_Object->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
     dummy_obj->SetClass(java_lang_Object.Ptr());
-    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()));
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone>()));
   } else {
     // Use an int array.
     dummy_obj->SetClass(int_array_class);
@@ -2075,7 +2081,7 @@
     CHECK_EQ(dummy_arr->GetLength(), length)
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
-    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()))
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone>()))
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
   }
@@ -2142,10 +2148,10 @@
 
 mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) {
   DCHECK(region_space_->IsInFromSpace(from_ref));
-  // No read barrier to avoid nested RB that might violate the to-space
-  // invariant. Note that from_ref is a from space ref so the SizeOf()
-  // call will access the from-space meta objects, but it's ok and necessary.
-  size_t obj_size = from_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+  // There must not be a read barrier to avoid nested RB that might violate the to-space invariant.
+  // Note that from_ref is a from space ref so the SizeOf() call will access the from-space meta
+  // objects, but it's ok and necessary.
+  size_t obj_size = from_ref->SizeOf<kDefaultVerifyFlags>();
   size_t region_space_alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
   size_t region_space_bytes_allocated = 0U;
   size_t non_moving_space_bytes_allocated = 0U;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 357541f..a853b98 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2337,9 +2337,7 @@
     size_t bin_size = object_addr - context->prev_;
     // Add the bin consisting of the end of the previous object to the start of the current object.
     collector->AddBin(bin_size, context->prev_);
-    // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
-    context->prev_ = object_addr + RoundUp(obj->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>(),
-                                           kObjectAlignment);
+    context->prev_ = object_addr + RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kObjectAlignment);
   }
 
   void AddBin(size_t size, uintptr_t position) {
@@ -2359,8 +2357,7 @@
 
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
-    size_t obj_size = obj->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
     size_t alloc_size = RoundUp(obj_size, kObjectAlignment);
     mirror::Object* forward_address;
     // Find the smallest bin which we can move obj in.
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 811f1ea..f83645e 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -513,8 +513,11 @@
   return GetClass<kVerifyFlags>()->IsPhantomReferenceClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+template<VerifyObjectFlags kVerifyFlags>
 inline size_t Object::SizeOf() {
+  // Read barrier is never required for SizeOf since objects sizes are constant. Reading from-space
+  // values is OK because of that.
+  static constexpr ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier;
   size_t result;
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
   if (IsArrayInstance<kVerifyFlags, kReadBarrierOption>()) {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index f7ab26d..417a22d 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -128,8 +128,7 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE bool InstanceOf(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   size_t SizeOf() REQUIRES_SHARED(Locks::mutator_lock_);
 
   Object* Clone(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 2b38b2e..c102fb0 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -28,12 +28,15 @@
 
 namespace art {
 
+// Disabled for performance reasons.
+static constexpr bool kCheckDebugDisallowReadBarrierCount = false;
+
 template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (kUseReadBarrier && with_read_barrier) {
-    if (kIsDebugBuild) {
+    if (kCheckDebugDisallowReadBarrierCount) {
       Thread* const self = Thread::Current();
       if (self != nullptr) {
         CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);