Use the lock word bits for Baker-style read barrier.

This enables the standard object header to be used with the
Baker-style read barrier.

Bug: 19355854
Bug: 12687968

Change-Id: Ie552b6e1dfe30e96cb1d0895bd0dff25f9d7d015
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 8057dd1..13921e7 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -124,7 +124,7 @@
 #define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
 ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
 
-#if defined(USE_BAKER_OR_BROOKS_READ_BARRIER)
+#if defined(USE_BROOKS_READ_BARRIER)
 #define MIRROR_OBJECT_HEADER_SIZE 16
 #else
 #define MIRROR_OBJECT_HEADER_SIZE 8
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 7bee98f..5239d44 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -479,7 +479,7 @@
   ObjectOffsets() : CheckOffsets<mirror::Object>(false, "Ljava/lang/Object;") {
     addOffset(OFFSETOF_MEMBER(mirror::Object, klass_), "shadow$_klass_");
     addOffset(OFFSETOF_MEMBER(mirror::Object, monitor_), "shadow$_monitor_");
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+#ifdef USE_BROOKS_READ_BARRIER
     addOffset(OFFSETOF_MEMBER(mirror::Object, x_rb_ptr_), "shadow$_x_rb_ptr_");
     addOffset(OFFSETOF_MEMBER(mirror::Object, x_xpadding_), "shadow$_x_xpadding_");
 #endif
@@ -736,14 +736,14 @@
   EXPECT_FALSE(JavaLangObject->IsSynthetic());
   EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
   EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
-  if (!kUseBakerOrBrooksReadBarrier) {
+  if (!kUseBrooksReadBarrier) {
     EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
   } else {
     EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
   }
   EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
   EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
-  if (kUseBakerOrBrooksReadBarrier) {
+  if (kUseBrooksReadBarrier) {
     EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
     EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
   }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index eabb1c2..26f349a 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -718,6 +718,7 @@
       // Leave References gray so that GetReferent() will trigger RB.
       CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
     } else {
+#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
       if (kUseBakerReadBarrier) {
         if (region_space_->IsInToSpace(to_ref)) {
           // If to-space, change from gray to white.
@@ -739,6 +740,9 @@
           CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
         }
       }
+#else
+      DCHECK(!kUseBakerReadBarrier);
+#endif
     }
     if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
       ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
@@ -815,7 +819,7 @@
     DCHECK(obj != nullptr);
     DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
     DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
     DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
   }
 
@@ -963,7 +967,8 @@
     if (kUseBakerReadBarrier) {
       DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
       // Clear the black ptr.
-      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
     }
     size_t obj_size = ref->SizeOf();
     size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
@@ -1330,10 +1335,6 @@
   while (true) {
     // Copy the object. TODO: copy only the lockword in the second iteration and on?
     memcpy(to_ref, from_ref, obj_size);
-    // Set the gray ptr.
-    if (kUseBakerReadBarrier) {
-      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
-    }
 
     LockWord old_lock_word = to_ref->GetLockWord(false);
 
@@ -1378,6 +1379,11 @@
       return to_ref;
     }
 
+    // Set the gray ptr.
+    if (kUseBakerReadBarrier) {
+      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
 
     // Try to atomically write the fwd ptr.
@@ -1484,6 +1490,21 @@
   }
   DCHECK(from_ref != nullptr);
   DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (kUseBakerReadBarrier && !is_active_) {
+    // In the lock word forward address state, the read barrier bits
+    // in the lock word are part of the stored forwarding address and
+    // invalid. This is usually OK as the from-space copy of objects
+    // aren't accessed by mutators due to the to-space
+    // invariant. However, during the dex2oat image writing relocation
+    // and the zygote compaction, objects can be in the forward
+    // address state (to store the forward/relocation addresses) and
+    // they can still be accessed and the invalid read barrier bits
+    // are consulted. If they look like gray but aren't really, the
+    // read barriers slow path can trigger when it shouldn't. To guard
+    // against this, return here if the CC collector isn't running.
+    return from_ref;
+  }
+  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
   space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
   if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) {
     // It's already marked.
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 922a71c..7b19dc9 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -60,10 +60,6 @@
   if (obj == nullptr) {
     return;
   }
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct forward pointer installed.
-    obj->AssertReadBarrierPointer();
-  }
   if (from_space_->HasAddress(obj)) {
     mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
     // If the object has already been moved, return the new forward address.
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 4c93a4c..4ba3983 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -96,11 +96,11 @@
         << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
     if (heap->ConcurrentCopyingCollector()->RegionSpace()->IsInToSpace(ref)) {
       // Moving objects.
-      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
       CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
       // Non-moving objects.
-      ref->SetReadBarrierPointer(ReadBarrier::BlackPtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
       CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
     }
   }
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index d831bfb..341501b 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -53,6 +53,7 @@
 inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
     : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
              (kStateFat << kStateShift)) {
+  DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
 #ifndef __LP64__
   DCHECK_ALIGNED(mon, kMonitorIdAlignment);
 #endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 46c3bd4..655aa3a 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -94,6 +94,7 @@
     kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
     // When the state is kHashCode, the non-state bits hold the hashcode.
+    // Note Object.hashCode() has the hash code layout hardcoded.
     kHashShift = 0,
     kHashSize = 32 - kStateSize - kReadBarrierStateSize,
     kHashMask = (1 << kHashSize) - 1,
@@ -110,6 +111,7 @@
   static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
                     (rb_state << kReadBarrierStateShift) |
                     (kStateThinOrUnlocked << kStateShift));
@@ -122,12 +124,14 @@
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord((hash_code << kHashShift) |
                     (rb_state << kReadBarrierStateShift) |
                     (kStateHash << kStateShift));
   }
 
   static LockWord FromDefault(uint32_t rb_state) {
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord(rb_state << kReadBarrierStateShift);
   }
 
@@ -149,7 +153,8 @@
 
   LockState GetState() const {
     CheckReadBarrierState();
-    if (UNLIKELY(value_ == 0)) {
+    if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
+        (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
       return kUnlocked;
     } else {
       uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -171,6 +176,14 @@
     return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
   }
 
+  void SetReadBarrierState(uint32_t rb_state) {
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+    // Clear and or the bits.
+    value_ &= ~(kReadBarrierStateMask << kReadBarrierStateShift);
+    value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 2581fad..95c1d11 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -115,8 +115,11 @@
 }
 
 inline Object* Object::GetReadBarrierPointer() {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  return reinterpret_cast<Object*>(GetLockWord(false).ReadBarrierState());
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
       OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
 #else
@@ -126,8 +129,14 @@
 }
 
 inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  LockWord lw = GetLockWord(false);
+  lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  SetLockWord(lw, false);
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   // We don't mark the card as this occurs as part of object allocation. Not all objects have
   // backing cards, such as large objects.
   SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
@@ -140,8 +149,27 @@
 }
 
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(reinterpret_cast<Object*>(lw.ReadBarrierState()) != expected_rb_ptr)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    expected_lw.SetReadBarrierState(
+        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
+    new_lw = lw;
+    new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  } while (!CasLockWordWeakSequentiallyConsistent(expected_lw, new_lw));
+  return true;
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + offset.SizeValue();
   Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 343c9bc..7f162b7 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -62,7 +62,7 @@
 static constexpr bool kCheckFieldAssignments = false;
 
 // Size of Object.
-static constexpr uint32_t kObjectHeaderSize = kUseBakerOrBrooksReadBarrier ? 16 : 8;
+static constexpr uint32_t kObjectHeaderSize = kUseBrooksReadBarrier ? 16 : 8;
 
 // C++ mirror of java.lang.Object
 class MANAGED LOCKABLE Object {
@@ -94,6 +94,9 @@
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
+  NO_RETURN
+#endif
   bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -502,11 +505,11 @@
   // Monitor and hash code information.
   uint32_t monitor_;
 
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+#ifdef USE_BROOKS_READ_BARRIER
   // Note names use a 'x' prefix and the x_rb_ptr_ is of type int
   // instead of Object to go with the alphabetical/by-type field order
   // on the Java side.
-  uint32_t x_rb_ptr_;      // For the Baker or Brooks pointer.
+  uint32_t x_rb_ptr_;      // For the Brooks pointer.
   uint32_t x_xpadding_;    // For 8-byte alignment. TODO: get rid of this.
 #endif