ART: Add object-readbarrier-inl.h

Move some read-barrier code into a new header. This prunes the
include tree for the concurrent-copying collector. Clean up other
related includes.

Test: mmma art
Change-Id: I40ce4e74f2e5d4c692529ffb4df933230b6fd73e
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index c975944..dc880b0 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -30,7 +30,7 @@
 #include "mirror/stack_trace_element.h"
 #include "oat_quick_method_header.h"
 #include "optimizing/stack_map_stream.h"
-#include "runtime.h"
+#include "runtime-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 21042a3..b34d938 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -27,6 +27,7 @@
 #include "dex_file.h"
 #include "gtest/gtest.h"
 #include "indirect_reference_table.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mem_map.h"
 #include "mirror/class-inl.h"
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c840e70..5136d7d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -47,6 +47,7 @@
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "graph_visualizer.h"
+#include "intern_table.h"
 #include "intrinsics.h"
 #include "leb128.h"
 #include "mirror/array-inl.h"
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index c0714d7..750f9cc 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -24,6 +24,7 @@
 #include "intrinsics_utils.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 788c162..4d36015 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -24,6 +24,7 @@
 #include "intrinsics.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index a68fafc..fd8a37a 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -22,6 +22,7 @@
 #include "common_arm.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 50ea33d..8e45747 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -27,6 +27,7 @@
 #include "intrinsics_utils.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 0ece86a..8ed2ad8 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -27,6 +27,7 @@
 #include "intrinsics_utils.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 2866d4b..011ef65 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -192,6 +192,7 @@
         "runtime_callbacks.cc",
         "runtime_common.cc",
         "runtime_options.cc",
+        "scoped_thread_state_change.cc",
         "signal_catcher.cc",
         "stack.cc",
         "stack_map.cc",
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index c9fb3ed..3c51f52 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -25,7 +25,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/iftable.h"
-#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change-inl.h"
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index bee7d0b..ee33fc4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -65,6 +65,7 @@
 #include "imtable-inl.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
+#include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jit/profile_compilation_info.h"
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 374b711..0891d3f 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -17,6 +17,7 @@
 #include "class_table.h"
 
 #include "mirror/class-inl.h"
+#include "oat_file.h"
 
 namespace art {
 
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 15724a1..01c6641 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -39,6 +39,7 @@
 #include "gtest/gtest.h"
 #include "handle_scope-inl.h"
 #include "interpreter/unstarted_runtime.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index b5130d7..c340a88 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -26,6 +26,7 @@
 #include "entrypoints/quick/callee_save_frame.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
+#include "java_vm_ext.h"
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index 546e59d..eeb138b 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -17,6 +17,7 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "java_vm_ext.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 854d0a5..8a9b11d 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -22,7 +22,7 @@
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/space/region_space.h"
-#include "mirror/object-inl.h"
+#include "mirror/object-readbarrier-inl.h"
 #include "lock_word.h"
 
 namespace art {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e08784d..df12d87 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -65,6 +65,7 @@
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
+#include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "obj_ptr-inl.h"
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 65a550e..886c950 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -18,6 +18,7 @@
 
 #include "base/time_utils.h"
 #include "collector/garbage_collector.h"
+#include "java_vm_ext.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
diff --git a/runtime/gc/system_weak_test.cc b/runtime/gc/system_weak_test.cc
index 9b601c0..dfbbd2a 100644
--- a/runtime/gc/system_weak_test.cc
+++ b/runtime/gc/system_weak_test.cc
@@ -23,6 +23,7 @@
 #include "base/mutex.h"
 #include "collector_type.h"
 #include "common_runtime_test.h"
+#include "gc_root-inl.h"
 #include "handle_scope-inl.h"
 #include "heap.h"
 #include "mirror/string.h"
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 24ee227..2128f8c 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -24,7 +24,6 @@
 #include "base/dumpable.h"
 #include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
-#include "runtime-inl.h"
 #include "verify_object.h"
 
 namespace art {
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 9fbb2e9..c852d5a 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -18,6 +18,7 @@
 
 #include "base/dumpable-inl.h"
 #include "base/systrace.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "nth_caller_visitor.h"
 #include "reference_table.h"
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index f0d0260..311515c7 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -18,6 +18,7 @@
 
 #include "base/hash_set.h"
 #include "common_runtime_test.h"
+#include "gc_root-inl.h"
 #include "mirror/object.h"
 #include "handle_scope-inl.h"
 #include "mirror/string.h"
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 3631a9d..b32b272 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -23,6 +23,7 @@
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
+#include "java_vm_ext.h"
 #include "jit_code_cache.h"
 #include "oat_file_manager.h"
 #include "oat_quick_method_header.h"
diff --git a/runtime/jni_env_ext-inl.h b/runtime/jni_env_ext-inl.h
index 004f824..25893b7 100644
--- a/runtime/jni_env_ext-inl.h
+++ b/runtime/jni_env_ext-inl.h
@@ -19,9 +19,7 @@
 
 #include "jni_env_ext.h"
 
-#include "indirect_reference_table-inl.h"
-#include "obj_ptr-inl.h"
-#include "utils.h"
+#include "mirror/object.h"
 
 namespace art {
 
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 4004c45..60e4295 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -23,12 +23,17 @@
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
 #include "object_callbacks.h"
+#include "obj_ptr.h"
 #include "reference_table.h"
 
 namespace art {
 
 class JavaVMExt;
 
+namespace mirror {
+class Object;
+}  // namespace mirror
+
 // Number of local references in the indirect reference table. The value is arbitrary but
 // low enough that it forces sanity checks.
 static constexpr size_t kLocalsInitial = 512;
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index be03f04..bfbd4df 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -26,6 +26,7 @@
 #include "base/logging.h"
 #include "class.h"
 #include "gc/heap-inl.h"
+#include "object-inl.h"
 #include "obj_ptr-inl.h"
 #include "thread.h"
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 2247f7d..6c723ef 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -30,6 +30,7 @@
 #include "gc/heap-inl.h"
 #include "iftable.h"
 #include "object_array-inl.h"
+#include "object-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
 #include "runtime.h"
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index aaa590b..baed5f1 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -32,6 +32,7 @@
 #include "monitor.h"
 #include "object_array-inl.h"
 #include "object_reference-inl.h"
+#include "object-readbarrier-inl.h"
 #include "obj_ptr-inl.h"
 #include "read_barrier-inl.h"
 #include "reference.h"
@@ -66,14 +67,6 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline LockWord Object::GetLockWord(bool as_volatile) {
-  if (as_volatile) {
-    return LockWord(GetField32Volatile<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  }
-  return LockWord(GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-}
-
-template<VerifyObjectFlags kVerifyFlags>
 inline void Object::SetLockWord(LockWord new_val, bool as_volatile) {
   // Force use of non-transactional mode and do not check.
   if (as_volatile) {
@@ -91,24 +84,12 @@
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
-inline bool Object::CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val) {
-  // Force use of non-transactional mode and do not check.
-  return CasFieldWeakRelaxed32<false, false>(
-      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
-}
-
 inline bool Object::CasLockWordWeakAcquire(LockWord old_val, LockWord new_val) {
   // Force use of non-transactional mode and do not check.
   return CasFieldWeakAcquire32<false, false>(
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
-inline bool Object::CasLockWordWeakRelease(LockWord old_val, LockWord new_val) {
-  // Force use of non-transactional mode and do not check.
-  return CasFieldWeakRelease32<false, false>(
-      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
-}
-
 inline uint32_t Object::GetLockOwnerThreadId() {
   return Monitor::GetLockOwnerThreadId(this);
 }
@@ -141,84 +122,6 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-#if defined(__arm__)
-  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
-  uintptr_t result;
-  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
-  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
-  __asm__ __volatile__(
-      "ldr %[result], [%[obj], #4]\n\t"
-      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
-      // null, without them being able to assume that fact.
-      "eor %[fad], %[result], %[result]\n\t"
-      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
-      : [obj] "r" (obj));
-  DCHECK_EQ(*fake_address_dependency, 0U);
-  LockWord lw(static_cast<uint32_t>(result));
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#elif defined(__aarch64__)
-  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
-  uintptr_t result;
-  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
-  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
-  __asm__ __volatile__(
-      "ldr %w[result], [%[obj], #4]\n\t"
-      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
-      // null, without them being able to assume that fact.
-      "eor %[fad], %[result], %[result]\n\t"
-      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
-      : [obj] "r" (obj));
-  DCHECK_EQ(*fake_address_dependency, 0U);
-  LockWord lw(static_cast<uint32_t>(result));
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#elif defined(__i386__) || defined(__x86_64__)
-  LockWord lw = GetLockWord(false);
-  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
-  // reordering.
-  *fake_address_dependency = 0;
-  std::atomic_signal_fence(std::memory_order_acquire);
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#else
-  // MIPS32/MIPS64: use a memory barrier to prevent load-load reordering.
-  LockWord lw = GetLockWord(false);
-  *fake_address_dependency = 0;
-  std::atomic_thread_fence(std::memory_order_acquire);
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#endif
-}
-
-inline uint32_t Object::GetReadBarrierState() {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  DCHECK(kUseBakerReadBarrier);
-  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  uint32_t rb_state = lw.ReadBarrierState();
-  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
-  return rb_state;
-}
-
-inline uint32_t Object::GetReadBarrierStateAcquire() {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  uint32_t rb_state = lw.ReadBarrierState();
-  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
-  return rb_state;
-}
-
 inline uint32_t Object::GetMarkBit() {
 #ifdef USE_READ_BARRIER
   return GetLockWord(false).MarkBitState();
@@ -239,54 +142,6 @@
   SetLockWord(lw, false);
 }
 
-template<bool kCasRelease>
-inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
-  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
-  LockWord expected_lw;
-  LockWord new_lw;
-  do {
-    LockWord lw = GetLockWord(false);
-    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
-      // Lost the race.
-      return false;
-    }
-    expected_lw = lw;
-    expected_lw.SetReadBarrierState(expected_rb_state);
-    new_lw = lw;
-    new_lw.SetReadBarrierState(rb_state);
-    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
-    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
-    // an object and then changes the object from gray to black, the field updates (stores) will be
-    // visible (won't be reordered after this CAS.)
-  } while (!(kCasRelease ?
-             CasLockWordWeakRelease(expected_lw, new_lw) :
-             CasLockWordWeakRelaxed(expected_lw, new_lw)));
-  return true;
-}
-
-inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
-  LockWord expected_lw;
-  LockWord new_lw;
-  do {
-    LockWord lw = GetLockWord(false);
-    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
-      // Lost the race.
-      return false;
-    }
-    expected_lw = lw;
-    new_lw = lw;
-    new_lw.SetMarkBitState(mark_bit);
-    // Since this is only set from the mutator, we can use the non release Cas.
-  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
-  return true;
-}
-
-
 inline void Object::AssertReadBarrierState() const {
   CHECK(kUseBakerReadBarrier);
   Object* obj = const_cast<Object*>(this);
@@ -727,24 +582,6 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasFieldWeakRelaxed32(MemberOffset field_offset,
-                                          int32_t old_value, int32_t new_value) {
-  if (kCheckTransaction) {
-    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
-  }
-  if (kTransactionActive) {
-    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
-  }
-  if (kVerifyFlags & kVerifyThis) {
-    VerifyObject(this);
-  }
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
-  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
-
-  return atomic_addr->CompareExchangeWeakRelaxed(old_value, new_value);
-}
-
-template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakAcquire32(MemberOffset field_offset,
                                           int32_t old_value, int32_t new_value) {
   if (kCheckTransaction) {
@@ -1062,36 +899,6 @@
   return success;
 }
 
-template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
-    MemberOffset field_offset,
-    ObjPtr<Object> old_value,
-    ObjPtr<Object> new_value) {
-  if (kCheckTransaction) {
-    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
-  }
-  if (kVerifyFlags & kVerifyThis) {
-    VerifyObject(this);
-  }
-  if (kVerifyFlags & kVerifyWrites) {
-    VerifyObject(new_value);
-  }
-  if (kVerifyFlags & kVerifyReads) {
-    VerifyObject(old_value);
-  }
-  if (kTransactionActive) {
-    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
-  }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
-  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-
-  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
-                                                           new_ref.reference_);
-  return success;
-}
-
 template<bool kIsStatic,
          VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption,
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
new file mode 100644
index 0000000..58e7c20
--- /dev/null
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_OBJECT_READBARRIER_INL_H_
+#define ART_RUNTIME_MIRROR_OBJECT_READBARRIER_INL_H_
+
+#include "object.h"
+
+#include "atomic.h"
+#include "lock_word-inl.h"
+#include "object_reference-inl.h"
+#include "read_barrier.h"
+#include "runtime.h"
+
+namespace art {
+namespace mirror {
+
+template<VerifyObjectFlags kVerifyFlags>
+inline LockWord Object::GetLockWord(bool as_volatile) {
+  if (as_volatile) {
+    return LockWord(GetField32Volatile<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  }
+  return LockWord(GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelaxed32(MemberOffset field_offset,
+                                          int32_t old_value, int32_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
+
+  return atomic_addr->CompareExchangeWeakRelaxed(old_value, new_value);
+}
+
+inline bool Object::CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelaxed32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
+inline bool Object::CasLockWordWeakRelease(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelease32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
+inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+#if defined(__arm__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__aarch64__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %w[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__i386__) || defined(__x86_64__)
+  LockWord lw = GetLockWord(false);
+  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
+  // reordering.
+  *fake_address_dependency = 0;
+  std::atomic_signal_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#else
+  // MIPS32/MIPS64: use a memory barrier to prevent load-load reordering.
+  LockWord lw = GetLockWord(false);
+  *fake_address_dependency = 0;
+  std::atomic_thread_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#endif
+}
+
+inline uint32_t Object::GetReadBarrierState() {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+  DCHECK(kUseBakerReadBarrier);
+  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
+}
+
+inline uint32_t Object::GetReadBarrierStateAcquire() {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+  LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
+}
+
+template<bool kCasRelease>
+inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    expected_lw.SetReadBarrierState(expected_rb_state);
+    new_lw = lw;
+    new_lw.SetReadBarrierState(rb_state);
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
+  return true;
+}
+
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    new_lw = lw;
+    new_lw.SetMarkBitState(mark_bit);
+    // Since this is only set from the mutator, we can use the non release Cas.
+  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+  return true;
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
+                                                           new_ref.reference_);
+  return success;
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_OBJECT_READBARRIER_INL_H_
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 3e04bf6..dbec40c 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -24,8 +24,9 @@
 #include "android-base/stringprintf.h"
 
 #include "array-inl.h"
+#include "class.h"
 #include "gc/heap.h"
-#include "mirror/class.h"
+#include "object-inl.h"
 #include "obj_ptr-inl.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index f6a73a8..5c4e242 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -31,9 +31,12 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "gc/space/zygote_space.h"
+#include "handle_scope-inl.h"
 #include "hprof/hprof.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
+#include "mirror/object_array-inl.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "scoped_fast_native_object_access-inl.h"
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 34bbf32..ff4d931 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -46,6 +46,7 @@
 #include "gc/space/image_space.h"
 #include "gc/task_processor.h"
 #include "intern_table.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index db6f8ee..a7be73a 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -20,6 +20,7 @@
 
 #include <sys/stat.h>
 
+#include "android-base/stringprintf.h"
 #include "android-base/strings.h"
 
 #include "base/logging.h"
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
index bdaad20..0b93b07 100644
--- a/runtime/openjdkjvm/OpenjdkJvm.cc
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -35,28 +35,31 @@
 #include<stdio.h>
 #include <dlfcn.h>
 #include <limits.h>
-#include <unistd.h>
-
-#include "common_throws.h"
-#include "gc/heap.h"
-#include "thread.h"
-#include "thread_list.h"
-#include "runtime.h"
-#include "handle_scope-inl.h"
-#include "scoped_thread_state_change-inl.h"
-#include "ScopedUtfChars.h"
-#include "mirror/class_loader.h"
-#include "verify_object.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
-#include "jni_internal.h"
-#include "mirror/string-inl.h"
-#include "native/scoped_fast_native_object_access-inl.h"
-#include "ScopedLocalRef.h"
 #include <sys/time.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "common_throws.h"
+#include "gc/heap.h"
+#include "handle_scope-inl.h"
+#include "java_vm_ext.h"
+#include "jni_internal.h"
+#include "mirror/class_loader.h"
+#include "mirror/string-inl.h"
+#include "monitor.h"
+#include "native/scoped_fast_native_object_access-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "scoped_thread_state_change-inl.h"
+#include "ScopedLocalRef.h"
+#include "ScopedUtfChars.h"
+#include "verify_object.h"
 
 #undef LOG_TAG
 #define LOG_TAG "artopenjdk"
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 521494a..0ec92b7 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -36,10 +36,11 @@
 #include "gc/allocation_listener.h"
 #include "gc/gc_pause_listener.h"
 #include "gc/heap.h"
+#include "handle_scope-inl.h"
 #include "instrumentation.h"
 #include "jni_env_ext-inl.h"
 #include "mirror/class.h"
-#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index c102fb0..d3859b0 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -22,6 +22,7 @@
 #include "gc/collector/concurrent_copying-inl.h"
 #include "gc/heap.h"
 #include "mirror/object_reference.h"
+#include "mirror/object-readbarrier-inl.h"
 #include "mirror/reference.h"
 #include "runtime.h"
 #include "utils.h"
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 87bc7df..e16ef1d 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -23,6 +23,7 @@
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/executable.h"
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 2f70ded..1ba4b7b 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "common_compiler_test.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "scoped_thread_state_change-inl.h"
 
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 8346550..75c25dd 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -21,6 +21,7 @@
 
 #include "art_method.h"
 #include "class_linker.h"
+#include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
 #include "read_barrier-inl.h"
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a48a58d..93b416c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -83,6 +83,7 @@
 #include "instrumentation.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
+#include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jni_internal.h"
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index c817a9e..ed6e349 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -19,6 +19,7 @@
 
 #include "scoped_thread_state_change.h"
 
+#include "base/casts.h"
 #include "jni_env_ext-inl.h"
 #include "obj_ptr-inl.h"
 #include "thread-inl.h"
@@ -74,8 +75,10 @@
 template<typename T>
 inline T ScopedObjectAccessAlreadyRunnable::AddLocalReference(ObjPtr<mirror::Object> obj) const {
   Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
+  if (kIsDebugBuild) {
+    CHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
+    DCheckObjIsNotClearedJniWeakGlobal(obj);
+  }
   return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
 }
 
diff --git a/runtime/scoped_thread_state_change.cc b/runtime/scoped_thread_state_change.cc
new file mode 100644
index 0000000..94354fc
--- /dev/null
+++ b/runtime/scoped_thread_state_change.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_thread_state_change.h"
+
+#include <type_traits>
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "java_vm_ext.h"
+#include "obj_ptr-inl.h"
+#include "runtime-inl.h"
+
+namespace art {
+
+// See ScopedObjectAccessAlreadyRunnable::ScopedObjectAccessAlreadyRunnable(JavaVM*).
+static_assert(std::is_base_of<JavaVM, JavaVMExt>::value, "JavaVMExt does not extend JavaVM");
+
+void ScopedObjectAccessAlreadyRunnable::DCheckObjIsNotClearedJniWeakGlobal(
+    ObjPtr<mirror::Object> obj) {
+  DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
+}
+
+bool ScopedObjectAccessAlreadyRunnable::ForceCopy() const {
+  return vm_->ForceCopy();
+}
+
+}  // namespace art
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 5f03741..02b6124 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -17,17 +17,23 @@
 #ifndef ART_RUNTIME_SCOPED_THREAD_STATE_CHANGE_H_
 #define ART_RUNTIME_SCOPED_THREAD_STATE_CHANGE_H_
 
-#include "art_field.h"
-#include "base/casts.h"
+#include "jni.h"
+
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "base/value_object.h"
-#include "java_vm_ext.h"
 #include "thread_state.h"
-#include "verify_object.h"
 
 namespace art {
 
+class JavaVMExt;
 struct JNIEnvExt;
 template<class MirrorType> class ObjPtr;
+class Thread;
+
+namespace mirror {
+class Object;
+}  // namespace mirror
 
 // Scoped change into and out of a particular state. Handles Runnable transitions that require
 // more complicated suspension checking. The subclasses ScopedObjectAccessUnchecked and
@@ -74,9 +80,7 @@
     return vm_;
   }
 
-  bool ForceCopy() const {
-    return vm_->ForceCopy();
-  }
+  bool ForceCopy() const;
 
   /*
    * Add a local reference for an object to the indirect reference table associated with the
@@ -105,12 +109,17 @@
 
   // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
   // change into Runnable or acquire a share on the mutator_lock_.
+  // Note: The reinterpret_cast is backed by a static_assert in the cc file. Avoid a down_cast,
+  //       as it prevents forward declaration of JavaVMExt.
   explicit ScopedObjectAccessAlreadyRunnable(JavaVM* vm)
-      : self_(nullptr), env_(nullptr), vm_(down_cast<JavaVMExt*>(vm)) {}
+      : self_(nullptr), env_(nullptr), vm_(reinterpret_cast<JavaVMExt*>(vm)) {}
 
   // Here purely to force inlining.
   ALWAYS_INLINE ~ScopedObjectAccessAlreadyRunnable() {}
 
+  static void DCheckObjIsNotClearedJniWeakGlobal(ObjPtr<mirror::Object> obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Self thread, can be null.
   Thread* const self_;
   // The full JNIEnv.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index f887aaa..201701a 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -56,6 +56,7 @@
 #include "gc/space/space-inl.h"
 #include "handle_scope-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/class-inl.h"
diff --git a/test/497-inlining-and-class-loader/clear_dex_cache.cc b/test/497-inlining-and-class-loader/clear_dex_cache.cc
index 34a31aa..9ba05bc 100644
--- a/test/497-inlining-and-class-loader/clear_dex_cache.cc
+++ b/test/497-inlining-and-class-loader/clear_dex_cache.cc
@@ -17,6 +17,9 @@
 #include "art_method.h"
 #include "base/enums.h"
 #include "jni.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "thread.h"
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
index 290997a..19312b4 100644
--- a/test/900-hello-plugin/load_unload.cc
+++ b/test/900-hello-plugin/load_unload.cc
@@ -20,6 +20,8 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "java_vm_ext.h"
+#include "runtime.h"
 
 namespace art {