Relax CAS in some uses of Object::AtomicSetReadBarrierPointer.

Aside from the call to change the object from gray to black, the CAS in
AtomicSetReadBarrierPointer doesn't need to be a release CAS.

CC collector Ritz EAAC GC time: 28.4 -> 26.5s (-6.7%) on N5.

Bug: 12687968

Change-Id: Ic162717981b98e7f1e762fd6b27bd0c146187ca4
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index f4cf3ae..1b98164 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1078,7 +1078,7 @@
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
   } else {
     // We may occasionally leave a Reference black or white in the queue if its referent happens to
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
@@ -1087,9 +1087,10 @@
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
         // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::WhitePtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::WhitePtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       } else {
         // If non-moving space/unevac from space, change from gray
@@ -1099,9 +1100,10 @@
         // indicate non-moving objects that have been marked
         // through. Note we'd need to change from black to white
         // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::BlackPtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::BlackPtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     }
@@ -1225,9 +1227,6 @@
  public:
   explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
       : collector_(cc) {}
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5c12091..4603428 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -163,6 +163,7 @@
 #endif
 }
 
+template<bool kCasRelease>
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
@@ -181,10 +182,13 @@
         static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
     new_lw = lw;
     new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
-    // This CAS is a CAS release so that when GC updates all the fields of an object and then
-    // changes the object from gray to black, the field updates (stores) will be visible (won't be
-    // reordered after this CAS.)
-  } while (!CasLockWordWeakRelease(expected_lw, new_lw));
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
 #elif USE_BROOKS_READ_BARRIER
   DCHECK(kUseBrooksReadBarrier);
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 5c6520f..71e704e 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -92,13 +92,13 @@
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
+
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
+
+  template<bool kCasRelease = false>
   ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);