Reserve bits in the lock word for read barriers.

This prepares for the CC collector to use the standard object header
model by storing the read barrier state in the lock word.

Bug: 19355854
Bug: 12687968
Change-Id: Ia7585662dd2cebf0479a3e74f734afe5059fb70f
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 9cf005b..1a9dbea 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -161,7 +161,11 @@
     NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
         mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
-    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
+    // Zero out the read barrier bits.
+    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r3, 0, NULL);
+    // r1 is zero except for the rb bits here. Copy the read barrier bits into r2.
+    OpRegRegReg(kOpOr, rs_r2, rs_r2, rs_r1);
     NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
         mirror::Object::MonitorOffset().Int32Value() >> 2);
     LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
@@ -189,7 +193,14 @@
     NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
         mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
-    OpRegImm(kOpCmp, rs_r1, 0);
+    // Zero out the read barrier bits.
+    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
+    // r1 will be zero except for the rb bits if the following
+    // cmp-and-branch branches to eq where r2 will be used. Copy the
+    // read barrier bits into r2.
+    OpRegRegReg(kOpOr, rs_r2, rs_r2, rs_r1);
+    OpRegImm(kOpCmp, rs_r3, 0);
+
     LIR* it = OpIT(kCondEq, "");
     NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
         mirror::Object::MonitorOffset().Int32Value() >> 2);
@@ -228,14 +239,28 @@
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
-    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    if (!kUseReadBarrier) {
+      Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    } else {
+      NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
+              mirror::Object::MonitorOffset().Int32Value() >> 2);
+    }
     MarkPossibleNullPointerException(opt_flags);
-    LoadConstantNoClobber(rs_r3, 0);
-    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
+    // Zero out the read barrier bits.
+    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
+    // Zero out except the read barrier bits.
+    OpRegRegImm(kOpAnd, rs_r1, rs_r1, LockWord::kReadBarrierStateMaskShifted);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r3, rs_r2, NULL);
     GenMemBarrier(kAnyStore);
-    Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
-    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
-
+    LIR* unlock_success_branch;
+    if (!kUseReadBarrier) {
+      Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+      unlock_success_branch = OpUnconditionalBranch(NULL);
+    } else {
+      NewLIR4(kThumb2Strex, rs_r2.GetReg(), rs_r1.GetReg(), rs_r0.GetReg(),
+              mirror::Object::MonitorOffset().Int32Value() >> 2);
+      unlock_success_branch = OpCmpImmBranch(kCondEq, rs_r2, 0, NULL);
+    }
     LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
     slow_unlock_branch->target = slow_path_target;
     if (null_check_branch != nullptr) {
@@ -253,25 +278,57 @@
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
-    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    if (!kUseReadBarrier) {
+      Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    } else {
+      // If we use read barriers, we need to use atomic instructions.
+      NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
+              mirror::Object::MonitorOffset().Int32Value() >> 2);
+    }
     MarkPossibleNullPointerException(opt_flags);
     Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    LoadConstantNoClobber(rs_r3, 0);
+    // Zero out the read barrier bits.
+    OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled);
+    // Zero out except the read barrier bits.
+    OpRegRegImm(kOpAnd, rs_r1, rs_r1, LockWord::kReadBarrierStateMaskShifted);
     // Is lock unheld on lock or held by us (==thread_id) on unlock?
-    OpRegReg(kOpCmp, rs_r1, rs_r2);
-
-    LIR* it = OpIT(kCondEq, "EE");
-    if (GenMemBarrier(kAnyStore)) {
-      UpdateIT(it, "TEE");
+    OpRegReg(kOpCmp, rs_r3, rs_r2);
+    if (!kUseReadBarrier) {
+      LIR* it = OpIT(kCondEq, "EE");
+      if (GenMemBarrier(kAnyStore)) {
+        UpdateIT(it, "TEE");
+      }
+      Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+      // Go expensive route - UnlockObjectFromCode(obj);
+      LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
+                         rs_rARM_LR);
+      ClobberCallerSave();
+      LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
+      OpEndIT(it);
+      MarkSafepointPC(call_inst);
+    } else {
+      // If we use read barriers, we need to use atomic instructions.
+      LIR* it = OpIT(kCondEq, "");
+      if (GenMemBarrier(kAnyStore)) {
+        UpdateIT(it, "T");
+      }
+      NewLIR4/*eq*/(kThumb2Strex, rs_r2.GetReg(), rs_r1.GetReg(), rs_r0.GetReg(),
+                    mirror::Object::MonitorOffset().Int32Value() >> 2);
+      OpEndIT(it);
+      // Since we know r2 wasn't zero before the above it instruction,
+      // if r2 is zero here, we know r3 was equal to r2 and the strex
+      // suceeded (we're done). Otherwise (either r3 wasn't equal to r2
+      // or the strex failed), call the entrypoint.
+      OpRegImm(kOpCmp, rs_r2, 0);
+      LIR* it2 = OpIT(kCondNe, "T");
+      // Go expensive route - UnlockObjectFromCode(obj);
+      LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
+                         rs_rARM_LR);
+      ClobberCallerSave();
+      LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
+      OpEndIT(it2);
+      MarkSafepointPC(call_inst);
     }
-    Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
-    // Go expensive route - UnlockObjectFromCode(obj);
-    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
-                       rs_rARM_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
-    OpEndIT(it);
-    MarkSafepointPC(call_inst);
   }
 }
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 24e8fdf..15edcc5 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -172,7 +172,12 @@
   OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
   NewLIR2(kA64Ldxr2rX, rw3, rx2);
   MarkPossibleNullPointerException(opt_flags);
-  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w3, 0, NULL);
+  // Zero out the read barrier bits.
+  OpRegRegImm(kOpAnd, rs_w2, rs_w3, LockWord::kReadBarrierStateMaskShiftedToggled);
+  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w2, 0, NULL);
+  // w3 is zero except for the rb bits here. Copy the read barrier bits into w1.
+  OpRegRegReg(kOpOr, rs_w1, rs_w1, rs_w3);
+  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
   NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
   LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_w3, 0, NULL);
 
@@ -217,13 +222,28 @@
     }
   }
   Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
-  Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
+  if (!kUseReadBarrier) {
+    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
+  } else {
+    OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
+    NewLIR2(kA64Ldxr2rX, rw2, rx3);
+  }
   MarkPossibleNullPointerException(opt_flags);
-  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
+  // Zero out the read barrier bits.
+  OpRegRegImm(kOpAnd, rs_w3, rs_w2, LockWord::kReadBarrierStateMaskShiftedToggled);
+  // Zero out except the read barrier bits.
+  OpRegRegImm(kOpAnd, rs_w2, rs_w2, LockWord::kReadBarrierStateMaskShifted);
+  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w3, rs_w1, NULL);
   GenMemBarrier(kAnyStore);
-  Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_wzr);
-  LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
-
+  LIR* unlock_success_branch;
+  if (!kUseReadBarrier) {
+    Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
+    unlock_success_branch = OpUnconditionalBranch(NULL);
+  } else {
+    OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
+    NewLIR3(kA64Stxr3wrX, rw1, rw2, rx3);
+    unlock_success_branch = OpCmpImmBranch(kCondEq, rs_w1, 0, NULL);
+  }
   LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
   slow_unlock_branch->target = slow_path_target;
   if (null_check_branch != nullptr) {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index f5f9320..b4732c8 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -909,7 +909,9 @@
   heap->VisitObjects(CopyAndFixupObjectsCallback, this);
   // Fix up the object previously had hash codes.
   for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
-    hash_pair.first->SetLockWord(LockWord::FromHashCode(hash_pair.second), false);
+    Object* obj = hash_pair.first;
+    DCHECK_EQ(obj->GetLockWord(false).ReadBarrierState(), 0U);
+    obj->SetLockWord(LockWord::FromHashCode(hash_pair.second, 0U), false);
   }
   saved_hashes_.clear();
 }
@@ -935,7 +937,7 @@
   Object* copy = reinterpret_cast<Object*>(dst);
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
   // word.
-  copy->SetLockWord(LockWord(), false);
+  copy->SetLockWord(LockWord::Default(), false);
   image_writer->FixupObject(obj, copy);
 }
 
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index fec1ce5..aff3880 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -469,26 +469,33 @@
 .Lretry_lock:
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   r1, .Lnot_unlocked         @ already thin locked
-    @ unlocked case - r2 holds thread id with count of 0
+    mov    r3, r1
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    cbnz   r3, .Lnot_unlocked         @ already thin locked
+    @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
+    orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   r3, .Lstrex_fail           @ store failed, retry
+    cbnz   r3, .Llock_strex_fail      @ store failed, retry
     dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
     bx lr
-.Lstrex_fail:
-    b .Lretry_lock                    @ unlikely forward branch, need to reload and recheck r1/r2
-.Lnot_unlocked:
-    lsr    r3, r1, 30
+.Lnot_unlocked:  @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
+    lsr    r3, r1, LOCK_WORD_STATE_SHIFT
     cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, go slow path
     eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r2, r2                     @ zero top 16 bits
     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
                                       @ else contention, go to slow path
-    add    r2, r1, #65536             @ increment count in lock word placing in r2 for storing
-    lsr    r1, r2, 30                 @ if either of the top two bits are set, we overflowed.
-    cbnz   r1, .Lslow_lock            @ if we overflow the count go slow path
-    str    r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+    mov    r3, r1                     @ copy the lock word to check count overflow.
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits.
+    add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
+    lsr    r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  @ if either of the upper two bits (28-29) are set, we overflowed.
+    cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
+    add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
+    strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
+    cbnz   r3, .Llock_strex_fail      @ strex failed, retry
     bx lr
+.Llock_strex_fail:
+    b      .Lretry_lock               @ retry
 .Lslow_lock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
@@ -505,23 +512,46 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     cbz    r0, .Lslow_unlock
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
     ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsr    r2, r1, 30
+#else
+    ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ Need to use atomic instructions for read barrier
+#endif
+    lsr    r2, r1, #LOCK_WORD_STATE_SHIFT
     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
     ldr    r2, [r9, #THREAD_ID_OFFSET]
-    eor    r3, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    mov    r3, r1                     @ copy lock word to check thread id equality
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r3, r3                     @ zero top 16 bits
     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
-    cmp    r1, #65536
+    mov    r3, r1                     @ copy lock word to detect transition to unlocked
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
-    @ transition to unlocked, r3 holds 0
+    @ transition to unlocked
+    mov    r3, r1
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK  @ r3: zero except for the preserved read barrier bits
     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
+#ifndef USE_READ_BARRIER
     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
+    cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
+#endif
     bx     lr
-.Lrecursive_thin_unlock:
-    sub    r1, r1, #65536
+.Lrecursive_thin_unlock:  @ r1: original lock word
+    sub    r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ decrement count
+#ifndef USE_READ_BARRIER
     str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+    strex  r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
+    cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
+#endif
     bx     lr
+.Lunlock_strex_fail:
+    b      .Lretry_unlock             @ retry
 .Lslow_unlock:
     @ save callee saves in case exception allocation triggers GC
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 770073b5..382a4c2 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1000,25 +1000,33 @@
 .Lretry_lock:
     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
     ldxr   w1, [x4]
-    cbnz   w1, .Lnot_unlocked         // already thin locked
+    mov    x3, x1
+    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    cbnz   w3, .Lnot_unlocked         // already thin locked
+    // unlocked case - x1: original lock word that's zero except for the read barrier bits.
+    orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
     stxr   w3, w2, [x4]
-    cbnz   w3, .Lstrex_fail           // store failed, retry
+    cbnz   w3, .Llock_stxr_fail       // store failed, retry
     dmb    ishld                      // full (LoadLoad|LoadStore) memory barrier
     ret
-.Lstrex_fail:
-    b .Lretry_lock                    // unlikely forward branch, need to reload and recheck r1/r2
-.Lnot_unlocked:
-    lsr    w3, w1, 30
+.Lnot_unlocked:  // x1: original lock word
+    lsr    w3, w1, LOCK_WORD_STATE_SHIFT
     cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
     eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
     uxth   w2, w2                     // zero top 16 bits
     cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
                                       // else contention, go to slow path
-    add    w2, w1, #65536             // increment count in lock word placing in w2 for storing
-    lsr    w1, w2, 30                 // if either of the top two bits are set, we overflowed.
-    cbnz   w1, .Lslow_lock            // if we overflow the count go slow path
-    str    w2, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  // no need for stxr as we hold the lock
+    mov    x3, x1                     // copy the lock word to check count overflow.
+    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits.
+    add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
+    lsr    w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  // if either of the upper two bits (28-29) are set, we overflowed.
+    cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
+    add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
+    stxr   w3, w2, [x4]
+    cbnz   w3, .Llock_stxr_fail       // store failed, retry
     ret
+.Llock_stxr_fail:
+    b      .Lretry_lock               // retry
 .Lslow_lock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
@@ -1036,23 +1044,47 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     cbz    x0, .Lslow_unlock
-    ldr    w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsr    w2, w1, 30
+    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
+    ldr    w1, [x4]
+#else
+    ldxr   w1, [x4]                   // Need to use atomic instructions for read barrier
+#endif
+    lsr    w2, w1, LOCK_WORD_STATE_SHIFT
     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
-    eor    w3, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
+    mov    x3, x1                     // copy lock word to check thread id equality
+    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
     uxth   w3, w3                     // zero top 16 bits
     cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
-    cmp    w1, #65536
+    mov    x3, x1                     // copy lock word to detect transition to unlocked
+    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
-    // transition to unlocked, w3 holds 0
+    // transition to unlocked
+    mov    x3, x1
+    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK  // w3: zero except for the preserved read barrier bits
     dmb    ish                        // full (LoadStore|StoreStore) memory barrier
-    str    w3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#ifndef USE_READ_BARRIER
+    str    w3, [x4]
+#else
+    stxr   w2, w3, [x4]               // Need to use atomic instructions for read barrier
+    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
+#endif
     ret
-.Lrecursive_thin_unlock:
-    sub    w1, w1, #65536
-    str    w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+.Lrecursive_thin_unlock:  // w1: original lock word
+    sub    w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
+#ifndef USE_READ_BARRIER
+    str    w1, [x4]
+#else
+    stxr   w2, w1, [x4]               // Need to use atomic instructions for read barrier
+    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
+#endif
     ret
+.Lunlock_stxr_fail:
+    b      .Lretry_unlock               // retry
 .Lslow_unlock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index c2acdd1..c437428 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -922,29 +922,39 @@
     jz   .Lslow_lock
 .Lretry_lock:
     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx  // ecx := lock word
-    test LITERAL(0xC0000000), %ecx        // test the 2 high bits.
+    test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // test the 2 high bits.
     jne  .Lslow_lock                      // slow path if either of the two high bits are set.
-    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // lock word contains a thin lock
-    // unlocked case - %edx holds thread id with count of 0
+    // unlocked case - edx: original lock word, eax: obj.
     movl %eax, %ecx                       // remember object in case of retry
-    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
-    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
-    jnz  .Lcmpxchg_fail                   // cmpxchg failed retry
+    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
+    movl %fs:THREAD_ID_OFFSET, %edx       // load thread id.
+    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
+    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)  // eax: old val, edx: new val.
+    jnz  .Llock_cmpxchg_fail              // cmpxchg failed retry
     ret
-.Lcmpxchg_fail:
-    movl  %ecx, %eax                      // restore eax
-    jmp  .Lretry_lock
-.Lalready_thin:
+.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), eax: obj.
+    movl %fs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
-    addl LITERAL(65536), %ecx             // increment recursion count
-    test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
+    movl %edx, %ecx                       // copy the lock word to check count overflow.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count for overflow check.
+    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set.
     jne  .Lslow_lock                      // count overflowed so go slow
-    // update lockword, cmpxchg not necessary as we hold lock
-    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+    movl %eax, %ecx                       // save obj to use eax for cmpxchg.
+    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
+    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx  // increment recursion count again for real.
+    // update lockword, cmpxchg necessary for read barrier bits.
+    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)  // eax: old val, edx: new val.
+    jnz  .Llock_cmpxchg_fail              // cmpxchg failed retry
     ret
+.Llock_cmpxchg_fail:
+    movl  %ecx, %eax                      // restore eax
+    jmp  .Lretry_lock
 .Lslow_lock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
@@ -963,20 +973,43 @@
 DEFINE_FUNCTION art_quick_unlock_object
     testl %eax, %eax                      // null check object/eax
     jz   .Lslow_unlock
+.Lretry_unlock:
     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx  // ecx := lock word
     movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
-    test LITERAL(0xC0000000), %ecx
+    test LITERAL(LOCK_WORD_STATE_MASK), %ecx
     jnz  .Lslow_unlock                    // lock word contains a monitor
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
-    cmpl LITERAL(65536), %ecx
+    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
-    movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+    // update lockword, cmpxchg necessary for read barrier bits.
+    movl %eax, %edx                       // edx: obj
+    movl %ecx, %eax                       // eax: old lock word.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+#ifndef USE_READ_BARRIER
+    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+#else
+    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)  // eax: old val, ecx: new val.
+    jnz  .Lunlock_cmpxchg_fail            // cmpxchg failed retry
+#endif
     ret
-.Lrecursive_thin_unlock:
-    subl LITERAL(65536), %ecx
-    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+.Lrecursive_thin_unlock:  // ecx: original lock word, eax: obj
+    // update lockword, cmpxchg necessary for read barrier bits.
+    movl %eax, %edx                       // edx: obj
+    movl %ecx, %eax                       // eax: old lock word.
+    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // ecx: new lock word with decremented count.
+#ifndef USE_READ_BARRIER
+    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+#else
+    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)  // eax: old val, ecx: new val.
+    jnz  .Lunlock_cmpxchg_fail            // cmpxchg failed retry
+#endif
     ret
+.Lunlock_cmpxchg_fail:  // edx: obj
+    movl %edx, %eax                       // restore eax
+    jmp  .Lretry_unlock
 .Lslow_unlock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index c865541..9b6b367 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -955,24 +955,33 @@
     jz   .Lslow_lock
 .Lretry_lock:
     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
-    test LITERAL(0xC0000000), %ecx        // Test the 2 high bits.
+    test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
-    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // Lock word contains a thin lock.
-    // unlocked case - %edx holds thread id with count of 0
-    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
+    // unlocked case - edx: original lock word, edi: obj.
+    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
+    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
     lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
     jnz  .Lretry_lock                     // cmpxchg failed retry
     ret
-.Lalready_thin:
+.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
+    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
-    addl LITERAL(65536), %ecx             // increment recursion count
-    test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
+    movl %edx, %ecx                       // copy the lock word to check count overflow.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
+    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
     jne  .Lslow_lock                      // count overflowed so go slow
-    // update lockword, cmpxchg not necessary as we hold lock
-    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
+    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
+    // update lockword, cmpxchg necessary for read barrier bits.
+    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
+    jnz  .Lretry_lock                     // cmpxchg failed retry
     ret
 .Lslow_lock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -985,19 +994,37 @@
 DEFINE_FUNCTION art_quick_unlock_object
     testl %edi, %edi                      // null check object/edi
     jz   .Lslow_unlock
+.Lretry_unlock:
     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
-    test LITERAL(0xC0000000), %ecx
+    test LITERAL(LOCK_WORD_STATE_MASK), %ecx
     jnz  .Lslow_unlock                    // lock word contains a monitor
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
-    cmpl LITERAL(65536), %ecx
+    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
-    movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+    // update lockword, cmpxchg necessary for read barrier bits.
+    movl %ecx, %eax                       // eax: old lock word.
+    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+#ifndef USE_READ_BARRIER
+    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+#else
+    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
+    jnz  .Lretry_unlock                   // cmpxchg failed retry
+#endif
     ret
-.Lrecursive_thin_unlock:
-    subl LITERAL(65536), %ecx
+.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
+    // update lockword, cmpxchg necessary for read barrier bits.
+    movl %ecx, %eax                       // eax: old lock word.
+    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
+#ifndef USE_READ_BARRIER
     mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+#else
+    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
+    jnz  .Lretry_unlock                   // cmpxchg failed retry
+#endif
     ret
 .Lslow_unlock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index a35e05b..ee70fe7 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_ASM_SUPPORT_H_
 
 #if defined(__cplusplus)
+#include "lock_word.h"
 #include "mirror/art_method.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
@@ -156,6 +157,27 @@
 ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64,
             art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value())
 
+#define LOCK_WORD_STATE_SHIFT 30
+ADD_TEST_EQ(LOCK_WORD_STATE_SHIFT, static_cast<int32_t>(art::LockWord::kStateShift))
+
+#define LOCK_WORD_STATE_MASK 0xC0000000
+ADD_TEST_EQ(LOCK_WORD_STATE_MASK, static_cast<uint32_t>(art::LockWord::kStateMaskShifted))
+
+#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
+ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_SHIFT,
+            static_cast<int32_t>(art::LockWord::kReadBarrierStateShift))
+
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
+ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK,
+            static_cast<int32_t>(art::LockWord::kReadBarrierStateMaskShifted))
+
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xCFFFFFFF
+ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED,
+            static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled))
+
+#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
+ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 734c935..057eed1 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -484,14 +484,6 @@
   }
 }
 
-inline void ConcurrentCopying::SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref) {
-  DCHECK(region_space_->IsInFromSpace(from_ref));
-  DCHECK(region_space_->IsInToSpace(to_ref) || heap_->GetNonMovingSpace()->HasAddress(to_ref));
-  LockWord lw = from_ref->GetLockWord(false);
-  DCHECK_NE(lw.GetState(), LockWord::kForwardingAddress);
-  from_ref->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref)), false);
-}
-
 // The following visitors are that used to verify that there's no
 // references to the from-space left after marking.
 class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index d0e0446..bbb551a 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -230,8 +230,6 @@
   bool IsOnAllocStack(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::Object* GetFwdPtr(mirror::Object* from_ref)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FlipThreadRoots() LOCKS_EXCLUDED(Locks::mutator_lock_);;
   void SwapStacks(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void RecordLiveStackFreezeSize(Thread* self);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 234bce5..d1ce0bc 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -91,7 +91,7 @@
   const size_t alloc_size = RoundUp(obj->SizeOf(), space::BumpPointerSpace::kAlignment);
   LockWord lock_word = obj->GetLockWord(false);
   // If we have a non empty lock word, store it and restore it later.
-  if (lock_word.GetValue() != LockWord().GetValue()) {
+  if (!LockWord::IsDefault(lock_word)) {
     // Set the bit in the bitmap so that we know to restore it later.
     objects_with_lockword_->Set(obj);
     lock_words_to_restore_.push_back(lock_word);
@@ -509,7 +509,7 @@
   // Use memmove since there may be overlap.
   memmove(reinterpret_cast<void*>(dest_addr), reinterpret_cast<const void*>(obj), len);
   // Restore the saved lock word if needed.
-  LockWord lock_word;
+  LockWord lock_word = LockWord::Default();
   if (UNLIKELY(objects_with_lockword_->Test(obj))) {
     lock_word = lock_words_to_restore_.front();
     lock_words_to_restore_.pop_front();
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index c52578f..d831bfb 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -24,17 +24,20 @@
 
 inline uint32_t LockWord::ThinLockOwner() const {
   DCHECK_EQ(GetState(), kThinLocked);
+  CheckReadBarrierState();
   return (value_ >> kThinLockOwnerShift) & kThinLockOwnerMask;
 }
 
 inline uint32_t LockWord::ThinLockCount() const {
   DCHECK_EQ(GetState(), kThinLocked);
+  CheckReadBarrierState();
   return (value_ >> kThinLockCountShift) & kThinLockCountMask;
 }
 
 inline Monitor* LockWord::FatLockMonitor() const {
   DCHECK_EQ(GetState(), kFatLocked);
-  MonitorId mon_id = value_ & ~(kStateMask << kStateShift);
+  CheckReadBarrierState();
+  MonitorId mon_id = (value_ >> kMonitorIdShift) & kMonitorIdMask;
   return MonitorPool::MonitorFromMonitorId(mon_id);
 }
 
@@ -47,14 +50,20 @@
   DCHECK_EQ(GetState(), kUnlocked);
 }
 
-inline LockWord::LockWord(Monitor* mon)
-    : value_(mon->GetMonitorId() | (kStateFat << kStateShift)) {
+inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
+    : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
+             (kStateFat << kStateShift)) {
+#ifndef __LP64__
+  DCHECK_ALIGNED(mon, kMonitorIdAlignment);
+#endif
   DCHECK_EQ(FatLockMonitor(), mon);
   DCHECK_LE(mon->GetMonitorId(), static_cast<uint32_t>(kMaxMonitorId));
+  CheckReadBarrierState();
 }
 
 inline int32_t LockWord::GetHashCode() const {
   DCHECK_EQ(GetState(), kHashCode);
+  CheckReadBarrierState();
   return (value_ >> kHashShift) & kHashMask;
 }
 
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 2d5c71b..46c3bd4 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -21,6 +21,7 @@
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "read_barrier.h"
 #include "utils.h"
 
 namespace art {
@@ -31,34 +32,43 @@
 class Monitor;
 
 /* The lock value itself as stored in mirror::Object::monitor_.  The two most significant bits of
- * the state. The three possible states are fat locked, thin/unlocked, and hash code.
- * When the lock word is in the "thin" state and its bits are formatted as follows:
+ * the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding
+ * address. When the lock word is in the "thin" state and its bits are formatted as follows:
  *
- *  |33|22222222221111|1111110000000000|
- *  |10|98765432109876|5432109876543210|
- *  |00| lock count   |thread id owner |
+ *  |33|22|222222221111|1111110000000000|
+ *  |10|98|765432109876|5432109876543210|
+ *  |00|rb| lock count |thread id owner |
  *
  * When the lock word is in the "fat" state and its bits are formatted as follows:
  *
- *  |33|222222222211111111110000000000|
- *  |10|987654321098765432109876543210|
- *  |01| MonitorId                    |
+ *  |33|22|2222222211111111110000000000|
+ *  |10|98|7654321098765432109876543210|
+ *  |01|rb| MonitorId                  |
  *
  * When the lock word is in hash state and its bits are formatted as follows:
  *
- *  |33|222222222211111111110000000000|
- *  |10|987654321098765432109876543210|
- *  |10| HashCode                     |
+ *  |33|22|2222222211111111110000000000|
+ *  |10|98|7654321098765432109876543210|
+ *  |10|rb| HashCode                   |
+ *
+ * When the lock word is in fowarding address state and its bits are formatted as follows:
+ *
+ *  |33|22|2222222211111111110000000000|
+ *  |10|98|7654321098765432109876543210|
+ *  |11| ForwardingAddress             |
+ *
+ * The rb bits store the read barrier state.
  */
 class LockWord {
  public:
   enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
+    kReadBarrierStateSize = 2,
     // Number of bits to encode the thin lock owner.
     kThinLockOwnerSize = 16,
     // Remaining bits are the recursive lock count.
-    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize,
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize,
     // Thin lock bits. Owner in lowest bits.
 
     kThinLockOwnerShift = 0,
@@ -68,28 +78,41 @@
     kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift,
     kThinLockCountMask = (1 << kThinLockCountSize) - 1,
     kThinLockMaxCount = kThinLockCountMask,
+    kThinLockCountOne = 1 << kThinLockCountShift,  // == 65536 (0x10000)
 
     // State in the highest bits.
-    kStateShift = kThinLockCountSize + kThinLockCountShift,
+    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift,
     kStateMask = (1 << kStateSize) - 1,
+    kStateMaskShifted = kStateMask << kStateShift,
     kStateThinOrUnlocked = 0,
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+    kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
+    kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
+    kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
+    kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
     // When the state is kHashCode, the non-state bits hold the hashcode.
     kHashShift = 0,
-    kHashSize = 32 - kStateSize,
+    kHashSize = 32 - kStateSize - kReadBarrierStateSize,
     kHashMask = (1 << kHashSize) - 1,
     kMaxHash = kHashMask,
+
+    kMonitorIdShift = kHashShift,
+    kMonitorIdSize = kHashSize,
+    kMonitorIdMask = kHashMask,
+    kMonitorIdAlignmentShift = 32 - kMonitorIdSize,
+    kMonitorIdAlignment = 1 << kMonitorIdAlignmentShift,
     kMaxMonitorId = kMaxHash
   };
 
-  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count) {
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
     return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
-                     (kStateThinOrUnlocked << kStateShift));
+                    (rb_state << kReadBarrierStateShift) |
+                    (kStateThinOrUnlocked << kStateShift));
   }
 
   static LockWord FromForwardingAddress(size_t target) {
@@ -97,9 +120,23 @@
     return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
   }
 
-  static LockWord FromHashCode(uint32_t hash_code) {
+  static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
-    return LockWord((hash_code << kHashShift) | (kStateHash << kStateShift));
+    return LockWord((hash_code << kHashShift) |
+                    (rb_state << kReadBarrierStateShift) |
+                    (kStateHash << kStateShift));
+  }
+
+  static LockWord FromDefault(uint32_t rb_state) {
+    return LockWord(rb_state << kReadBarrierStateShift);
+  }
+
+  static bool IsDefault(LockWord lw) {
+    return LockWord().GetValue() == lw.GetValue();
+  }
+
+  static LockWord Default() {
+    return LockWord();
   }
 
   enum LockState {
@@ -111,6 +148,7 @@
   };
 
   LockState GetState() const {
+    CheckReadBarrierState();
     if (UNLIKELY(value_ == 0)) {
       return kUnlocked;
     } else {
@@ -129,6 +167,10 @@
     }
   }
 
+  uint32_t ReadBarrierState() const {
+    return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
@@ -141,25 +183,58 @@
   // Return the forwarding address stored in the monitor.
   size_t ForwardingAddress() const;
 
-  // Default constructor with no lock ownership.
-  LockWord();
-
   // Constructor a lock word for inflation to use a Monitor.
-  explicit LockWord(Monitor* mon);
-
-  bool operator==(const LockWord& rhs) const {
-    return GetValue() == rhs.GetValue();
-  }
+  explicit LockWord(Monitor* mon, uint32_t rb_state);
 
   // Return the hash code stored in the lock word, must be kHashCode state.
   int32_t GetHashCode() const;
 
-  uint32_t GetValue() const {
-    return value_;
+  template <bool kIncludeReadBarrierState>
+  static bool Equal(LockWord lw1, LockWord lw2) {
+    if (kIncludeReadBarrierState) {
+      return lw1.GetValue() == lw2.GetValue();
+    }
+    return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState();
   }
 
  private:
-  explicit LockWord(uint32_t val) : value_(val) {}
+  // Default constructor with no lock ownership.
+  LockWord();
+
+  explicit LockWord(uint32_t val) : value_(val) {
+    CheckReadBarrierState();
+  }
+
+  // Disallow this in favor of explicit Equal() with the
+  // kIncludeReadBarrierState param to make clients be aware of the
+  // read barrier state.
+  bool operator==(const LockWord& rhs) = delete;
+
+  void CheckReadBarrierState() const {
+    if (kIsDebugBuild && ((value_ >> kStateShift) & kStateMask) != kStateForwardingAddress) {
+      uint32_t rb_state = ReadBarrierState();
+      if (!kUseReadBarrier) {
+        DCHECK_EQ(rb_state, 0U);
+      } else {
+        DCHECK(rb_state == ReadBarrier::white_ptr_ ||
+               rb_state == ReadBarrier::gray_ptr_ ||
+               rb_state == ReadBarrier::black_ptr_) << rb_state;
+      }
+    }
+  }
+
+  // Note GetValue() includes the read barrier bits and comparing (==)
+  // GetValue() between two lock words to compare the lock states may
+  // not work. Prefer Equal() or GetValueWithoutReadBarrierState().
+  uint32_t GetValue() const {
+    CheckReadBarrierState();
+    return value_;
+  }
+
+  uint32_t GetValueWithoutReadBarrierState() const {
+    CheckReadBarrierState();
+    return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift);
+  }
 
   // Only Object should be converting LockWords to/from uints.
   friend class mirror::Object;
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 9262a3e..bbbdf98 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -159,7 +159,8 @@
       case LockWord::kUnlocked: {
         // Try to compare and swap in a new hash, if we succeed we will return the hash on the next
         // loop iteration.
-        LockWord hash_word(LockWord::FromHashCode(GenerateIdentityHashCode()));
+        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(),
+                                                    lw.ReadBarrierState());
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
         if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
           return hash_word.GetHashCode();
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 5ed8c7d..45a971d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -165,7 +165,7 @@
       return false;
     }
   }
-  LockWord fat(this);
+  LockWord fat(this, lw.ReadBarrierState());
   // Publish the updated lock word, which may race with other threads.
   bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
   // Lock profiling.
@@ -610,15 +610,22 @@
         return false;
       }
       // Deflate to a thin lock.
-      obj->SetLockWord(LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_), false);
+      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_,
+                                                 lw.ReadBarrierState());
+      // Assume no concurrent read barrier state changes as mutators are suspended.
+      obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
           << monitor->lock_count_;
     } else if (monitor->HasHashCode()) {
-      obj->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()), false);
+      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState());
+      // Assume no concurrent read barrier state changes as mutators are suspended.
+      obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
     } else {
       // No lock and no hash, just put an empty lock word inside the object.
-      obj->SetLockWord(LockWord(), false);
+      LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState());
+      // Assume no concurrent read barrier state changes as mutators are suspended.
+      obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated" << obj << " to empty lock word";
     }
     // The monitor is deflated, mark the object as nullptr so that we know to delete it during the
@@ -704,7 +711,7 @@
     LockWord lock_word = h_obj->GetLockWord(true);
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
-        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
         if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
           // CasLockWord enforces more than the acquire ordering we need here.
           return h_obj.Get();  // Success!
@@ -717,9 +724,18 @@
           // We own the lock, increase the recursion count.
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
-            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-            h_obj->SetLockWord(thin_locked, true);
-            return h_obj.Get();  // Success!
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count,
+                                                          lock_word.ReadBarrierState()));
+            if (!kUseReadBarrier) {
+              h_obj->SetLockWord(thin_locked, true);
+              return h_obj.Get();  // Success!
+            } else {
+              // Use CAS to preserve the read barrier state.
+              if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
+                return h_obj.Get();  // Success!
+              }
+            }
+            continue;  // Go again.
           } else {
             // We'd overflow the recursion count, so inflate the monitor.
             InflateThinLocked(self, h_obj, lock_word, 0);
@@ -762,43 +778,57 @@
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
   obj = FakeUnlock(obj);
-  LockWord lock_word = obj->GetLockWord(true);
   StackHandleScope<1> hs(self);
   Handle<mirror::Object> h_obj(hs.NewHandle(obj));
-  switch (lock_word.GetState()) {
-    case LockWord::kHashCode:
-      // Fall-through.
-    case LockWord::kUnlocked:
-      FailedUnlock(h_obj.Get(), self, nullptr, nullptr);
-      return false;  // Failure.
-    case LockWord::kThinLocked: {
-      uint32_t thread_id = self->GetThreadId();
-      uint32_t owner_thread_id = lock_word.ThinLockOwner();
-      if (owner_thread_id != thread_id) {
-        // TODO: there's a race here with the owner dying while we unlock.
-        Thread* owner =
-            Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
-        FailedUnlock(h_obj.Get(), self, owner, nullptr);
+  while (true) {
+    LockWord lock_word = obj->GetLockWord(true);
+    switch (lock_word.GetState()) {
+      case LockWord::kHashCode:
+        // Fall-through.
+      case LockWord::kUnlocked:
+        FailedUnlock(h_obj.Get(), self, nullptr, nullptr);
         return false;  // Failure.
-      } else {
-        // We own the lock, decrease the recursion count.
-        if (lock_word.ThinLockCount() != 0) {
-          uint32_t new_count = lock_word.ThinLockCount() - 1;
-          LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-          h_obj->SetLockWord(thin_locked, true);
+      case LockWord::kThinLocked: {
+        uint32_t thread_id = self->GetThreadId();
+        uint32_t owner_thread_id = lock_word.ThinLockOwner();
+        if (owner_thread_id != thread_id) {
+          // TODO: there's a race here with the owner dying while we unlock.
+          Thread* owner =
+              Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+          FailedUnlock(h_obj.Get(), self, owner, nullptr);
+          return false;  // Failure.
         } else {
-          h_obj->SetLockWord(LockWord(), true);
+          // We own the lock, decrease the recursion count.
+          LockWord new_lw = LockWord::Default();
+          if (lock_word.ThinLockCount() != 0) {
+            uint32_t new_count = lock_word.ThinLockCount() - 1;
+            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState());
+          } else {
+            new_lw = LockWord::FromDefault(lock_word.ReadBarrierState());
+          }
+          if (!kUseReadBarrier) {
+            DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
+            h_obj->SetLockWord(new_lw, true);
+            // Success!
+            return true;
+          } else {
+            // Use CAS to preserve the read barrier state.
+            if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, new_lw)) {
+              // Success!
+              return true;
+            }
+          }
+          continue;  // Go again.
         }
-        return true;  // Success!
       }
-    }
-    case LockWord::kFatLocked: {
-      Monitor* mon = lock_word.FatLockMonitor();
-      return mon->Unlock(self);
-    }
-    default: {
-      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
-      return false;
+      case LockWord::kFatLocked: {
+        Monitor* mon = lock_word.FatLockMonitor();
+        return mon->Unlock(self);
+      }
+      default: {
+        LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+        return false;
+      }
     }
   }
 }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 0c5f8a4..2e065ae 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -22,12 +22,16 @@
 
 #include <iosfwd>
 #include <list>
+#ifndef __LP64__
+#include <malloc.h>  // For memalign.
+#endif
 #include <vector>
 
 #include "atomic.h"
 #include "base/allocator.h"
 #include "base/mutex.h"
 #include "gc_root.h"
+#include "lock_word.h"
 #include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread_state.h"
@@ -127,8 +131,17 @@
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
   static bool Deflate(Thread* self, mirror::Object* obj)
+      // Not exclusive because ImageWriter calls this during a Heap::VisitObjects() that
+      // does not allow a thread suspension in the middle. TODO: maybe make this exclusive.
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+#ifndef __LP64__
+  void* operator new(size_t size) {
+    // Align Monitor* as per the monitor ID field size in the lock word.
+    return memalign(LockWord::kMonitorIdAlignment, size);
+  }
+#endif
+
  private:
   explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 27678dc..8ae5a54 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -45,7 +45,9 @@
   static Monitor* CreateMonitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #ifndef __LP64__
-    return new Monitor(self, owner, obj, hash_code);
+    Monitor* mon = new Monitor(self, owner, obj, hash_code);
+    DCHECK_ALIGNED(mon, LockWord::kMonitorIdAlignment);
+    return mon;
 #else
     return GetMonitorPool()->CreateMonitorInPool(self, owner, obj, hash_code);
 #endif
@@ -71,7 +73,7 @@
 
   static Monitor* MonitorFromMonitorId(MonitorId mon_id) {
 #ifndef __LP64__
-    return reinterpret_cast<Monitor*>(mon_id << 3);
+    return reinterpret_cast<Monitor*>(mon_id << LockWord::kMonitorIdAlignmentShift);
 #else
     return GetMonitorPool()->LookupMonitor(mon_id);
 #endif
@@ -79,7 +81,7 @@
 
   static MonitorId MonitorIdFromMonitor(Monitor* mon) {
 #ifndef __LP64__
-    return reinterpret_cast<MonitorId>(mon) >> 3;
+    return reinterpret_cast<MonitorId>(mon) >> LockWord::kMonitorIdAlignmentShift;
 #else
     return mon->GetMonitorId();
 #endif
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index 49efaa2..a2c4c36 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -35,6 +35,10 @@
 #define USE_BAKER_OR_BROOKS_READ_BARRIER
 #endif
 
+#if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER) || defined(USE_TABLE_LOOKUP_READ_BARRIER)
+#define USE_READ_BARRIER
+#endif
+
 #if defined(USE_BAKER_READ_BARRIER) && defined(USE_BROOKS_READ_BARRIER)
 #error "Only one of Baker or Brooks can be enabled at a time."
 #endif
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 83c5ffb..7c0e7d7 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -54,7 +54,7 @@
     : suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_),
       suspend_all_historam_("suspend all histogram", 16, 64) {
-  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
+  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
 ThreadList::~ThreadList() {
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index b80fe22..5db51c8 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -63,7 +63,7 @@
     ASSERT_TRUE(h_klass->IsVerified());
 
     mirror::Class::Status old_status = h_klass->GetStatus();
-    uint32_t old_lock_word = h_klass->GetLockWord(false).GetValue();
+    LockWord old_lock_word = h_klass->GetLockWord(false);
 
     Transaction transaction;
     Runtime::Current()->EnterTransactionMode(&transaction);
@@ -75,8 +75,8 @@
     ASSERT_TRUE(transaction.IsAborted());
 
     // Check class's monitor get back to its original state without rolling back changes.
-    uint32_t new_lock_word = h_klass->GetLockWord(false).GetValue();
-    EXPECT_EQ(old_lock_word, new_lock_word);
+    LockWord new_lock_word = h_klass->GetLockWord(false);
+    EXPECT_TRUE(LockWord::Equal<false>(old_lock_word, new_lock_word));
 
     // Check class status is rolled back properly.
     soa.Self()->ClearException();
@@ -118,20 +118,20 @@
 
   // Lock object's monitor outside the transaction.
   h_obj->MonitorEnter(soa.Self());
-  uint32_t old_lock_word = h_obj->GetLockWord(false).GetValue();
+  LockWord old_lock_word = h_obj->GetLockWord(false);
 
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
   // Unlock object's monitor inside the transaction.
   h_obj->MonitorExit(soa.Self());
-  uint32_t new_lock_word = h_obj->GetLockWord(false).GetValue();
+  LockWord new_lock_word = h_obj->GetLockWord(false);
   Runtime::Current()->ExitTransactionMode();
 
   // Rolling back transaction's changes must not change monitor's state.
   transaction.Rollback();
-  uint32_t aborted_lock_word = h_obj->GetLockWord(false).GetValue();
-  EXPECT_NE(old_lock_word, new_lock_word);
-  EXPECT_EQ(aborted_lock_word, new_lock_word);
+  LockWord aborted_lock_word = h_obj->GetLockWord(false);
+  EXPECT_FALSE(LockWord::Equal<false>(old_lock_word, new_lock_word));
+  EXPECT_TRUE(LockWord::Equal<false>(aborted_lock_word, new_lock_word));
 }
 
 // Tests array's length is preserved after transaction rollback.