Merge "ART: Use the right ElfBuilder for oatdump symbolizer"
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index ad4ddad..8f5d3ae 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -101,6 +101,14 @@
     false,  // kIntrinsicCas
     false,  // kIntrinsicUnsafeGet
     false,  // kIntrinsicUnsafePut
+    false,  // kIntrinsicUnsafeGetAndAddInt,
+    false,  // kIntrinsicUnsafeGetAndAddLong,
+    false,  // kIntrinsicUnsafeGetAndSetInt,
+    false,  // kIntrinsicUnsafeGetAndSetLong,
+    false,  // kIntrinsicUnsafeGetAndSetObject,
+    false,  // kIntrinsicUnsafeLoadFence,
+    false,  // kIntrinsicUnsafeStoreFence,
+    false,  // kIntrinsicUnsafeFullFence,
     true,   // kIntrinsicSystemArrayCopyCharArray
     true,   // kIntrinsicSystemArrayCopy
 };
@@ -177,6 +185,14 @@
 static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndAddInt], "UnsafeGetAndAddInt must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndAddLong], "UnsafeGetAndAddLong must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndSetInt], "UnsafeGetAndSetInt must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndSetLong], "UnsafeGetAndSetLong must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGetAndSetObject], "UnsafeGetAndSetObject must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeLoadFence], "UnsafeLoadFence must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeStoreFence], "UnsafeStoreFence must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeFullFence], "UnsafeFullFence must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopy],
@@ -318,6 +334,14 @@
     "putObject",             // kNameCachePutObject
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
+    "getAndAddInt",          // kNameCacheGetAndAddInt,
+    "getAndAddLong",         // kNameCacheGetAndAddLong,
+    "getAndSetInt",          // kNameCacheGetAndSetInt,
+    "getAndSetLong",         // kNameCacheGetAndSetLong,
+    "getAndSetObject",       // kNameCacheGetAndSetObject,
+    "loadFence",             // kNameCacheLoadFence,
+    "storeFence",            // kNameCacheStoreFence,
+    "fullFence",             // kNameCacheFullFence,
     "arraycopy",             // kNameCacheArrayCopy
     "bitCount",              // kNameCacheBitCount
     "compare",               // kNameCacheCompare
@@ -404,10 +428,14 @@
         kClassCacheJavaLangObject, kClassCacheJavaLangObject } },
     // kProtoCacheObjectJ_I
     { kClassCacheInt, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJI_I
+    { kClassCacheInt, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
     // kProtoCacheObjectJI_V
     { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
     // kProtoCacheObjectJ_J
     { kClassCacheLong, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJJ_J
+    { kClassCacheLong, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
     // kProtoCacheObjectJJ_V
     { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
     // kProtoCacheObjectJ_Object
@@ -415,6 +443,9 @@
     // kProtoCacheObjectJObject_V
     { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong,
         kClassCacheJavaLangObject } },
+    // kProtoCacheObjectJObject_Object
+    { kClassCacheJavaLangObject, 3, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject } },
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
         kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
@@ -609,6 +640,16 @@
     UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
 #undef UNSAFE_GET_PUT
 
+    // 1.8
+    INTRINSIC(SunMiscUnsafe, GetAndAddInt, ObjectJI_I, kIntrinsicUnsafeGetAndAddInt, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndAddLong, ObjectJJ_J, kIntrinsicUnsafeGetAndAddLong, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndSetInt, ObjectJI_I, kIntrinsicUnsafeGetAndSetInt, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndSetLong, ObjectJJ_J, kIntrinsicUnsafeGetAndSetLong, 0),
+    INTRINSIC(SunMiscUnsafe, GetAndSetObject, ObjectJObject_Object, kIntrinsicUnsafeGetAndSetObject, 0),
+    INTRINSIC(SunMiscUnsafe, LoadFence, _V, kIntrinsicUnsafeLoadFence, 0),
+    INTRINSIC(SunMiscUnsafe, StoreFence, _V, kIntrinsicUnsafeStoreFence, 0),
+    INTRINSIC(SunMiscUnsafe, FullFence, _V, kIntrinsicUnsafeFullFence, 0),
+
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
     INTRINSIC(JavaLangSystem, ArrayCopy, ObjectIObjectII_V , kIntrinsicSystemArrayCopy,
@@ -815,6 +856,14 @@
     case kIntrinsicRotateRight:
     case kIntrinsicRotateLeft:
     case kIntrinsicSignum:
+    case kIntrinsicUnsafeGetAndAddInt:
+    case kIntrinsicUnsafeGetAndAddLong:
+    case kIntrinsicUnsafeGetAndSetInt:
+    case kIntrinsicUnsafeGetAndSetLong:
+    case kIntrinsicUnsafeGetAndSetObject:
+    case kIntrinsicUnsafeLoadFence:
+    case kIntrinsicUnsafeStoreFence:
+    case kIntrinsicUnsafeFullFence:
     case kIntrinsicSystemArrayCopy:
       return false;   // not implemented in quick.
     default:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index b465db2..34b56cd 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -227,6 +227,14 @@
       kNameCachePutObject,
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
+      kNameCacheGetAndAddInt,
+      kNameCacheGetAndAddLong,
+      kNameCacheGetAndSetInt,
+      kNameCacheGetAndSetLong,
+      kNameCacheGetAndSetObject,
+      kNameCacheLoadFence,
+      kNameCacheStoreFence,
+      kNameCacheFullFence,
       kNameCacheArrayCopy,
       kNameCacheBitCount,
       kNameCacheCompare,
@@ -282,11 +290,14 @@
       kProtoCacheObjectJJJ_Z,
       kProtoCacheObjectJObjectObject_Z,
       kProtoCacheObjectJ_I,
+      kProtoCacheObjectJI_I,
       kProtoCacheObjectJI_V,
       kProtoCacheObjectJ_J,
+      kProtoCacheObjectJJ_J,
       kProtoCacheObjectJJ_V,
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
+      kProtoCacheObjectJObject_Object,
       kProtoCacheCharArrayICharArrayII_V,
       kProtoCacheObjectIObjectII_V,
       kProtoCacheIICharArrayI_V,
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index dba9dd7..f204b28 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -443,7 +443,7 @@
 
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
 
-  uintptr_t NativeOffsetInImage(void* obj);
+  uintptr_t NativeOffsetInImage(void* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 3ed0278..5d4c4e2 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -472,6 +472,24 @@
       break;
     }
 
+    // 1.8.
+    case kIntrinsicUnsafeGetAndAddInt:
+      return Intrinsics::kUnsafeGetAndAddInt;
+    case kIntrinsicUnsafeGetAndAddLong:
+      return Intrinsics::kUnsafeGetAndAddLong;
+    case kIntrinsicUnsafeGetAndSetInt:
+      return Intrinsics::kUnsafeGetAndSetInt;
+    case kIntrinsicUnsafeGetAndSetLong:
+      return Intrinsics::kUnsafeGetAndSetLong;
+    case kIntrinsicUnsafeGetAndSetObject:
+      return Intrinsics::kUnsafeGetAndSetObject;
+    case kIntrinsicUnsafeLoadFence:
+      return Intrinsics::kUnsafeLoadFence;
+    case kIntrinsicUnsafeStoreFence:
+      return Intrinsics::kUnsafeStoreFence;
+    case kIntrinsicUnsafeFullFence:
+      return Intrinsics::kUnsafeFullFence;
+
     // Virtual cases.
 
     case kIntrinsicReferenceGetReferent:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 69c9708..b599d42 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2002,6 +2002,16 @@
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongLowestOneBit)
 
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeLoadFence)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeStoreFence)
+UNIMPLEMENTED_INTRINSIC(ARM, UnsafeFullFence)
+
 UNREACHABLE_INTRINSICS(ARM)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 934b427..ccbbd43 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1953,6 +1953,16 @@
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
 
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeLoadFence)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeStoreFence)
+UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeFullFence)
+
 UNREACHABLE_INTRINSICS(ARM64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index b8933e1..dd9294d 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -128,6 +128,14 @@
   V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndAddInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndAddLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndSetInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndSetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetAndSetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeLoadFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeStoreFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeFullFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow)
 
 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 710df0a..697b8fe 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1832,9 +1832,17 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTanh)
 
-UNREACHABLE_INTRINSICS(MIPS)
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeLoadFence)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeStoreFence)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeFullFence)
 
-#undef UNIMPLEMENTED_INTRINSIC
+UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 617844b..83dff33 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1729,6 +1729,16 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(MIPS64, LongLowestOneBit)
 
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeLoadFence)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeStoreFence)
+UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeFullFence)
+
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 9a2dc41..048590e 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2637,6 +2637,16 @@
 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
 
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeLoadFence)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeStoreFence)
+UNIMPLEMENTED_INTRINSIC(X86, UnsafeFullFence)
+
 UNREACHABLE_INTRINSICS(X86)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 75204b4..35e13a6 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2715,6 +2715,16 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeLoadFence)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeStoreFence)
+UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeFullFence)
+
 UNREACHABLE_INTRINSICS(X86_64)
 
 #undef __
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 12d6d8f..ebe89bb 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -463,6 +463,12 @@
       interface_method->VisitRoots(visitor, pointer_size);
     }
     visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
+    if (!IsNative()) {
+      ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
+      if (profiling_info != nullptr) {
+        profiling_info->VisitRoots(visitor);
+      }
+    }
   }
 }
 
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index a60f31e..f97ad51 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -377,7 +377,7 @@
 
   Runtime* runtime = Runtime::Current();
   const void* existing_entry_point = GetEntryPointFromQuickCompiledCode();
-  DCHECK(existing_entry_point != nullptr);
+  CHECK(existing_entry_point != nullptr) << PrettyMethod(this) << "@" << this;
   ClassLinker* class_linker = runtime->GetClassLinker();
 
   if (class_linker->IsQuickGenericJniStub(existing_entry_point)) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index a13a2e3..01d140a 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1207,217 +1207,222 @@
   Thread* const self = Thread::Current();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const ImageHeader& header = space->GetImageHeader();
-  // Add image classes into the class table for the class loader, and fixup the dex caches and
-  // class loader fields.
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
-  // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
-  // rely on clobering the dex cache arrays in the image to forward to bss.
-  size_t num_dex_caches_with_bss_arrays = 0;
-  const size_t num_dex_caches = dex_caches->GetLength();
-  for (size_t i = 0; i < num_dex_caches; i++) {
-    mirror::DexCache* const dex_cache = dex_caches->Get(i);
-    const DexFile* const dex_file = dex_cache->GetDexFile();
-    const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
-    if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
-      ++num_dex_caches_with_bss_arrays;
-    }
-  }
-  *out_forward_dex_cache_array = num_dex_caches_with_bss_arrays != 0;
-  if (*out_forward_dex_cache_array) {
-    if (num_dex_caches_with_bss_arrays != num_dex_caches) {
-      // Reject application image since we cannot forward only some of the dex cache arrays.
-      // TODO: We could get around this by having a dedicated forwarding slot. It should be an
-      // uncommon case.
-      *out_error_msg = StringPrintf("Dex caches in bss does not match total: %zu vs %zu",
-                                    num_dex_caches_with_bss_arrays,
-                                    num_dex_caches);
-      return false;
-    }
-  }
-  // Only add the classes to the class loader after the points where we can return false.
-  for (size_t i = 0; i < num_dex_caches; i++) {
-    mirror::DexCache* const dex_cache = dex_caches->Get(i);
-    const DexFile* const dex_file = dex_cache->GetDexFile();
-    const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
-    if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
-    // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
-      // copy over the arrays.
-      DCHECK(dex_file != nullptr);
-      const size_t num_strings = dex_file->NumStringIds();
-      const size_t num_types = dex_file->NumTypeIds();
-      const size_t num_methods = dex_file->NumMethodIds();
-      const size_t num_fields = dex_file->NumFieldIds();
-      CHECK_EQ(num_strings, dex_cache->NumStrings());
-      CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
-      CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
-      CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
-      DexCacheArraysLayout layout(image_pointer_size_, dex_file);
-      uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
-      // The space is not yet visible to the GC, we can avoid the read barriers and use std::copy_n.
-      if (num_strings != 0u) {
-        GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings();
-        GcRoot<mirror::String>* const strings =
-            reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
-        for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
-          DCHECK(strings[j].IsNull());
-        }
-        std::copy_n(image_resolved_strings, num_strings, strings);
-        dex_cache->SetStrings(strings);
-      }
-      if (num_types != 0u) {
-        GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
-        GcRoot<mirror::Class>* const types =
-            reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
-        for (size_t j = 0; kIsDebugBuild && j < num_types; ++j) {
-          DCHECK(types[j].IsNull());
-        }
-        std::copy_n(image_resolved_types, num_types, types);
-        // Store a pointer to the new location for fast ArtMethod patching without requiring map.
-        // This leaves random garbage at the start of the dex cache array, but nobody should ever
-        // read from it again.
-        *reinterpret_cast<GcRoot<mirror::Class>**>(image_resolved_types) = types;
-        dex_cache->SetResolvedTypes(types);
-      }
-      if (num_methods != 0u) {
-        ArtMethod** const methods = reinterpret_cast<ArtMethod**>(
-            raw_arrays + layout.MethodsOffset());
-        ArtMethod** const image_resolved_methods = dex_cache->GetResolvedMethods();
-        for (size_t j = 0; kIsDebugBuild && j < num_methods; ++j) {
-          DCHECK(methods[j] == nullptr);
-        }
-        std::copy_n(image_resolved_methods, num_methods, methods);
-        // Store a pointer to the new location for fast ArtMethod patching without requiring map.
-        *reinterpret_cast<ArtMethod***>(image_resolved_methods) = methods;
-        dex_cache->SetResolvedMethods(methods);
-      }
-      if (num_fields != 0u) {
-        ArtField** const fields = reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
-        for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
-          DCHECK(fields[j] == nullptr);
-        }
-        std::copy_n(dex_cache->GetResolvedFields(), num_fields, fields);
-        dex_cache->SetResolvedFields(fields);
+  {
+    // Add image classes into the class table for the class loader, and fixup the dex caches and
+    // class loader fields.
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
+    // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
+    // rely on clobering the dex cache arrays in the image to forward to bss.
+    size_t num_dex_caches_with_bss_arrays = 0;
+    const size_t num_dex_caches = dex_caches->GetLength();
+    for (size_t i = 0; i < num_dex_caches; i++) {
+      mirror::DexCache* const dex_cache = dex_caches->Get(i);
+      const DexFile* const dex_file = dex_cache->GetDexFile();
+      const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+      if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+        ++num_dex_caches_with_bss_arrays;
       }
     }
-    {
-      WriterMutexLock mu2(self, dex_lock_);
-      // Make sure to do this after we update the arrays since we store the resolved types array
-      // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
-      // BSS.
-      mirror::DexCache* existing_dex_cache = FindDexCacheLocked(self,
-                                                                *dex_file,
-                                                                /*allow_failure*/true);
-      CHECK(existing_dex_cache == nullptr);
-      StackHandleScope<1> hs3(self);
-      RegisterDexFileLocked(*dex_file, hs3.NewHandle(dex_cache));
+    *out_forward_dex_cache_array = num_dex_caches_with_bss_arrays != 0;
+    if (*out_forward_dex_cache_array) {
+      if (num_dex_caches_with_bss_arrays != num_dex_caches) {
+        // Reject application image since we cannot forward only some of the dex cache arrays.
+        // TODO: We could get around this by having a dedicated forwarding slot. It should be an
+        // uncommon case.
+        *out_error_msg = StringPrintf("Dex caches in bss does not match total: %zu vs %zu",
+                                      num_dex_caches_with_bss_arrays,
+                                      num_dex_caches);
+        return false;
+      }
     }
-    GcRoot<mirror::Class>* const types = dex_cache->GetResolvedTypes();
-    const size_t num_types = dex_cache->NumResolvedTypes();
-    if (new_class_set == nullptr) {
-      for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
-        // The image space is not yet added to the heap, avoid read barriers.
-        mirror::Class* klass = types[j].Read();
-        if (klass != nullptr) {
-          DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
-          // Update the class loader from the one in the image class loader to the one that loaded
-          // the app image.
-          klass->SetClassLoader(class_loader.Get());
-          // The resolved type could be from another dex cache, go through the dex cache just in
-          // case. May be null for array classes.
-          if (klass->GetDexCacheStrings() != nullptr) {
-            DCHECK(!klass->IsArrayClass());
-            klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
+    // Only add the classes to the class loader after the points where we can return false.
+    for (size_t i = 0; i < num_dex_caches; i++) {
+      mirror::DexCache* const dex_cache = dex_caches->Get(i);
+      const DexFile* const dex_file = dex_cache->GetDexFile();
+      const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+      if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+      // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
+        // copy over the arrays.
+        DCHECK(dex_file != nullptr);
+        const size_t num_strings = dex_file->NumStringIds();
+        const size_t num_types = dex_file->NumTypeIds();
+        const size_t num_methods = dex_file->NumMethodIds();
+        const size_t num_fields = dex_file->NumFieldIds();
+        CHECK_EQ(num_strings, dex_cache->NumStrings());
+        CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
+        CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
+        CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
+        DexCacheArraysLayout layout(image_pointer_size_, dex_file);
+        uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
+        // The space is not yet visible to the GC, we can avoid the read barriers and use
+        // std::copy_n.
+        if (num_strings != 0u) {
+          GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings();
+          GcRoot<mirror::String>* const strings =
+              reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
+          for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
+            DCHECK(strings[j].IsNull());
           }
-          // If there are multiple dex caches, there may be the same class multiple times
-          // in different dex caches. Check for this since inserting will add duplicates
-          // otherwise.
-          if (num_dex_caches > 1) {
-            mirror::Class* existing = table->LookupByDescriptor(klass);
-            if (existing != nullptr) {
-              DCHECK_EQ(existing, klass) << PrettyClass(klass);
+          std::copy_n(image_resolved_strings, num_strings, strings);
+          dex_cache->SetStrings(strings);
+        }
+        if (num_types != 0u) {
+          GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
+          GcRoot<mirror::Class>* const types =
+              reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
+          for (size_t j = 0; kIsDebugBuild && j < num_types; ++j) {
+            DCHECK(types[j].IsNull());
+          }
+          std::copy_n(image_resolved_types, num_types, types);
+          // Store a pointer to the new location for fast ArtMethod patching without requiring map.
+          // This leaves random garbage at the start of the dex cache array, but nobody should ever
+          // read from it again.
+          *reinterpret_cast<GcRoot<mirror::Class>**>(image_resolved_types) = types;
+          dex_cache->SetResolvedTypes(types);
+        }
+        if (num_methods != 0u) {
+          ArtMethod** const methods = reinterpret_cast<ArtMethod**>(
+              raw_arrays + layout.MethodsOffset());
+          ArtMethod** const image_resolved_methods = dex_cache->GetResolvedMethods();
+          for (size_t j = 0; kIsDebugBuild && j < num_methods; ++j) {
+            DCHECK(methods[j] == nullptr);
+          }
+          std::copy_n(image_resolved_methods, num_methods, methods);
+          // Store a pointer to the new location for fast ArtMethod patching without requiring map.
+          *reinterpret_cast<ArtMethod***>(image_resolved_methods) = methods;
+          dex_cache->SetResolvedMethods(methods);
+        }
+        if (num_fields != 0u) {
+          ArtField** const fields =
+              reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+          for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
+            DCHECK(fields[j] == nullptr);
+          }
+          std::copy_n(dex_cache->GetResolvedFields(), num_fields, fields);
+          dex_cache->SetResolvedFields(fields);
+        }
+      }
+      {
+        WriterMutexLock mu2(self, dex_lock_);
+        // Make sure to do this after we update the arrays since we store the resolved types array
+        // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
+        // BSS.
+        mirror::DexCache* existing_dex_cache = FindDexCacheLocked(self,
+                                                                  *dex_file,
+                                                                  /*allow_failure*/true);
+        CHECK(existing_dex_cache == nullptr);
+        StackHandleScope<1> hs3(self);
+        RegisterDexFileLocked(*dex_file, hs3.NewHandle(dex_cache));
+      }
+      GcRoot<mirror::Class>* const types = dex_cache->GetResolvedTypes();
+      const size_t num_types = dex_cache->NumResolvedTypes();
+      if (new_class_set == nullptr) {
+        for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
+          // The image space is not yet added to the heap, avoid read barriers.
+          mirror::Class* klass = types[j].Read();
+          if (klass != nullptr) {
+            DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+            // Update the class loader from the one in the image class loader to the one that loaded
+            // the app image.
+            klass->SetClassLoader(class_loader.Get());
+            // The resolved type could be from another dex cache, go through the dex cache just in
+            // case. May be null for array classes.
+            if (klass->GetDexCacheStrings() != nullptr) {
+              DCHECK(!klass->IsArrayClass());
+              klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
+            }
+            // If there are multiple dex caches, there may be the same class multiple times
+            // in different dex caches. Check for this since inserting will add duplicates
+            // otherwise.
+            if (num_dex_caches > 1) {
+              mirror::Class* existing = table->LookupByDescriptor(klass);
+              if (existing != nullptr) {
+                DCHECK_EQ(existing, klass) << PrettyClass(klass);
+              } else {
+                table->Insert(klass);
+              }
             } else {
               table->Insert(klass);
             }
-          } else {
-            table->Insert(klass);
-          }
-          // Double checked VLOG to avoid overhead.
-          if (VLOG_IS_ON(image)) {
-            VLOG(image) << PrettyClass(klass) << " " << klass->GetStatus();
-            if (!klass->IsArrayClass()) {
-              VLOG(image) << "From " << klass->GetDexCache()->GetDexFile()->GetBaseLocation();
-            }
-            VLOG(image) << "Direct methods";
-            for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
-              VLOG(image) << PrettyMethod(&m);
-            }
-            VLOG(image) << "Virtual methods";
-            for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
-              VLOG(image) << PrettyMethod(&m);
+            // Double checked VLOG to avoid overhead.
+            if (VLOG_IS_ON(image)) {
+              VLOG(image) << PrettyClass(klass) << " " << klass->GetStatus();
+              if (!klass->IsArrayClass()) {
+                VLOG(image) << "From " << klass->GetDexCache()->GetDexFile()->GetBaseLocation();
+              }
+              VLOG(image) << "Direct methods";
+              for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+                VLOG(image) << PrettyMethod(&m);
+              }
+              VLOG(image) << "Virtual methods";
+              for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
+                VLOG(image) << PrettyMethod(&m);
+              }
             }
           }
         }
       }
-    }
-    if (kIsDebugBuild) {
-      for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
-        // The image space is not yet added to the heap, avoid read barriers.
-        mirror::Class* klass = types[j].Read();
-        if (klass != nullptr) {
-          DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
-          if (kIsDebugBuild) {
-            if (new_class_set != nullptr)   {
-              auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
-              DCHECK(it != new_class_set->end());
-              DCHECK_EQ(it->Read(), klass);
-              mirror::Class* super_class = klass->GetSuperClass();
-              if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
-                auto it2 = new_class_set->Find(GcRoot<mirror::Class>(super_class));
-                DCHECK(it2 != new_class_set->end());
-                DCHECK_EQ(it2->Read(), super_class);
-              }
-            } else {
-              DCHECK_EQ(table->LookupByDescriptor(klass), klass);
-              mirror::Class* super_class = klass->GetSuperClass();
-              if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
-                CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
+      if (kIsDebugBuild) {
+        for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
+          // The image space is not yet added to the heap, avoid read barriers.
+          mirror::Class* klass = types[j].Read();
+          if (klass != nullptr) {
+            DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
+            if (kIsDebugBuild) {
+              if (new_class_set != nullptr)   {
+                auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
+                DCHECK(it != new_class_set->end());
+                DCHECK_EQ(it->Read(), klass);
+                mirror::Class* super_class = klass->GetSuperClass();
+                if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
+                  auto it2 = new_class_set->Find(GcRoot<mirror::Class>(super_class));
+                  DCHECK(it2 != new_class_set->end());
+                  DCHECK_EQ(it2->Read(), super_class);
+                }
+              } else {
+                DCHECK_EQ(table->LookupByDescriptor(klass), klass);
+                mirror::Class* super_class = klass->GetSuperClass();
+                if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
+                  CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
+                }
               }
             }
-          }
-          if (kIsDebugBuild) {
-            for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
-              const void* code = m.GetEntryPointFromQuickCompiledCode();
-              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
-              if (!IsQuickResolutionStub(code) &&
-                  !IsQuickGenericJniStub(code) &&
-                  !IsQuickToInterpreterBridge(code) &&
-                  !m.IsNative()) {
-                DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+            if (kIsDebugBuild) {
+              for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+                const void* code = m.GetEntryPointFromQuickCompiledCode();
+                const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+                if (!IsQuickResolutionStub(code) &&
+                    !IsQuickGenericJniStub(code) &&
+                    !IsQuickToInterpreterBridge(code) &&
+                    !m.IsNative()) {
+                  DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+                }
               }
-            }
-            for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
-              const void* code = m.GetEntryPointFromQuickCompiledCode();
-              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
-              if (!IsQuickResolutionStub(code) &&
-                  !IsQuickGenericJniStub(code) &&
-                  !IsQuickToInterpreterBridge(code) &&
-                  !m.IsNative()) {
-                DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+              for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
+                const void* code = m.GetEntryPointFromQuickCompiledCode();
+                const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+                if (!IsQuickResolutionStub(code) &&
+                    !IsQuickGenericJniStub(code) &&
+                    !IsQuickToInterpreterBridge(code) &&
+                    !m.IsNative()) {
+                  DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
+                }
               }
             }
           }
         }
       }
     }
-  }
-  if (*out_forward_dex_cache_array) {
-    FixupArtMethodArrayVisitor visitor(header);
-    header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor,
-        space->Begin(),
-        sizeof(void*));
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
+    if (*out_forward_dex_cache_array) {
+      ScopedTrace timing("Fixup ArtMethod dex cache arrays");
+      FixupArtMethodArrayVisitor visitor(header);
+      header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
+          &visitor,
+          space->Begin(),
+          sizeof(void*));
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
+    }
   }
   return true;
 }
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 7727b2d..6beb606 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -131,7 +131,7 @@
 
 class BitmapSetSlowPathVisitor {
  public:
-  void operator()(const mirror::Object* obj) const {
+  void operator()(const mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
     // Marking a large object, make sure its aligned as a sanity check.
     if (!IsAligned<kPageSize>(obj)) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3480483..faa3d3b 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1545,7 +1545,6 @@
 }
 
 void Heap::DumpSpaces(std::ostream& stream) const {
-  ScopedObjectAccess soa(Thread::Current());
   for (const auto& space : continuous_spaces_) {
     accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
     accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 889069d..e0a53a0 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -651,8 +651,8 @@
     }
   }
 
-  std::string DumpSpaces() const WARN_UNUSED;
-  void DumpSpaces(std::ostream& stream) const;
+  void DumpSpaces(std::ostream& stream) const SHARED_REQUIRES(Locks::mutator_lock_);
+  std::string DumpSpaces() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Dump object should only be used by the signal handler.
   void DumpObject(std::ostream& stream, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index e70fe21..010f677 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -27,6 +27,7 @@
 #include "base/stl_util.h"
 #include "image.h"
 #include "os.h"
+#include "scoped_thread_state_change.h"
 #include "space-inl.h"
 #include "thread-inl.h"
 
@@ -190,6 +191,7 @@
   MutexLock mu(self, lock_);
   auto it = large_objects_.find(ptr);
   if (UNLIKELY(it == large_objects_.end())) {
+    ScopedObjectAccess soa(self);
     Runtime::Current()->GetHeap()->DumpSpaces(LOG(INTERNAL_FATAL));
     LOG(FATAL) << "Attempted to free large object " << ptr << " which was not live";
   }
diff --git a/runtime/interpreter/mterp/mips/binop.S b/runtime/interpreter/mterp/mips/binop.S
index ce09da45..66627e2 100644
--- a/runtime/interpreter/mterp/mips/binop.S
+++ b/runtime/interpreter/mterp/mips/binop.S
@@ -7,8 +7,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
diff --git a/runtime/interpreter/mterp/mips64/bincmp.S b/runtime/interpreter/mterp/mips64/bincmp.S
index d39c900..aa5e74b 100644
--- a/runtime/interpreter/mterp/mips64/bincmp.S
+++ b/runtime/interpreter/mterp/mips64/bincmp.S
@@ -6,27 +6,27 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     b${condition}c a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/footer.S b/runtime/interpreter/mterp/mips64/footer.S
index 1a2e22b..14d5fe0 100644
--- a/runtime/interpreter/mterp/mips64/footer.S
+++ b/runtime/interpreter/mterp/mips64/footer.S
@@ -49,6 +49,7 @@
  *
  */
     .extern MterpHandleException
+    .extern MterpShouldSwitchInterpreters
 MterpException:
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -59,8 +60,11 @@
     REFRESH_IBASE
     daddu   rPC, a0, CODEITEM_INSNS_OFFSET
     dlsa    rPC, a1, rPC, 1                         # generate new dex_pc_ptr
-    sd      rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* Do we need to switch interpreters? */
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
@@ -81,10 +85,24 @@
     EXPORT_PC
     move    a0, rSELF
     jal     MterpSuspendCheck                       # (self)
+    bnezc   v0, MterpFallback                       # Something in the environment changed, switch interpreters
     GET_INST_OPCODE v0                              # extract opcode from rINST
     GOTO_OPCODE v0                                  # jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST                               # rINST contains offset
+    jal     MterpLogOSR
+#endif
+    li      v0, 1                                   # Signal normal return
+    b       MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
     .extern MterpLogFallback
diff --git a/runtime/interpreter/mterp/mips64/header.S b/runtime/interpreter/mterp/mips64/header.S
index 4c3ca9e..dd0fbe0 100644
--- a/runtime/interpreter/mterp/mips64/header.S
+++ b/runtime/interpreter/mterp/mips64/header.S
@@ -82,14 +82,7 @@
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
-/*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
+#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
diff --git a/runtime/interpreter/mterp/mips64/invoke.S b/runtime/interpreter/mterp/mips64/invoke.S
index 4ae4fb1..be647b6 100644
--- a/runtime/interpreter/mterp/mips64/invoke.S
+++ b/runtime/interpreter/mterp/mips64/invoke.S
@@ -5,6 +5,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern $helper
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -13,5 +14,7 @@
     jal     $helper
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
diff --git a/runtime/interpreter/mterp/mips64/op_goto.S b/runtime/interpreter/mterp/mips64/op_goto.S
index f2df3e4..7c7d0ec 100644
--- a/runtime/interpreter/mterp/mips64/op_goto.S
+++ b/runtime/interpreter/mterp/mips64/op_goto.S
@@ -5,19 +5,21 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    srl     a0, rINST, 8
-    seb     a0, a0                      # a0 <- sign-extended AA
-    dlsa    rPC, a0, rPC, 1             # rPC <- rPC + AA * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a0, 1f                      # AA * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a0, MterpCheckSuspendAndContinue
+    .extern MterpProfileBranch
+    srl     rINST, rINST, 8
+    seb     rINST, rINST                # rINST <- offset (sign-extended AA)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_goto_16.S b/runtime/interpreter/mterp/mips64/op_goto_16.S
index cbf8cf2..566e3a7 100644
--- a/runtime/interpreter/mterp/mips64/op_goto_16.S
+++ b/runtime/interpreter/mterp/mips64/op_goto_16.S
@@ -5,18 +5,20 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    lh      a0, 2(rPC)                  # a0 <- sign-extended AAAA
-    dlsa    rPC, a0, rPC, 1             # rPC <- rPC + AAAA * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a0, 1f                      # AA * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a0, MterpCheckSuspendAndContinue
+    .extern MterpProfileBranch
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_goto_32.S b/runtime/interpreter/mterp/mips64/op_goto_32.S
index 4a1feac..b260083 100644
--- a/runtime/interpreter/mterp/mips64/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips64/op_goto_32.S
@@ -8,20 +8,22 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    lh      a0, 2(rPC)                  # a0 <- aaaa (low)
+    .extern MterpProfileBranch
+    lh      rINST, 2(rPC)               # rINST <- aaaa (low)
     lh      a1, 4(rPC)                  # a1 <- AAAA (high)
-    ins     a0, a1, 16, 16              # a0 = sign-extended AAAAaaaa
-    dlsa    rPC, a0, rPC, 1             # rPC <- rPC + AAAAAAAA * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgtz    a0, 1f                      # AA * 2 > 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    blez    a0, MterpCheckSuspendAndContinue
+    ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/op_packed_switch.S b/runtime/interpreter/mterp/mips64/op_packed_switch.S
index cdbdf75..2c6eb2f 100644
--- a/runtime/interpreter/mterp/mips64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips64/op_packed_switch.S
@@ -10,6 +10,7 @@
      */
     /* op vAA, +BBBBBBBB */
     .extern $func
+    .extern MterpProfileBranch
     lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
     lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
     srl     a3, rINST, 8                # a3 <- AA
@@ -17,15 +18,19 @@
     GET_VREG a1, a3                     # a1 <- vAA
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     $func                       # v0 <- code-unit branch offset
-    dlsa    rPC, v0, rPC, 1             # rPC <- rPC + offset * 2
-    FETCH_INST                          # load rINST
-#if MTERP_SUSPEND
-    bgtz    v0, 1f                      # offset * 2 > 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    blez    v0, MterpCheckSuspendAndContinue
+    move    rINST, v0
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/zcmp.S b/runtime/interpreter/mterp/mips64/zcmp.S
index d7ad894..0e0477f 100644
--- a/runtime/interpreter/mterp/mips64/zcmp.S
+++ b/runtime/interpreter/mterp/mips64/zcmp.S
@@ -6,25 +6,25 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     b${condition}zc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index ca727f4..10b19c5 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -147,16 +147,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const instrumentation::Instrumentation* const instrumentation =
       Runtime::Current()->GetInstrumentation();
-  bool unhandled_instrumentation;
-  // TODO: enable for other targets after more extensive testing.
-  if ((kRuntimeISA == kArm64) || (kRuntimeISA == kArm) ||
-      (kRuntimeISA == kX86_64) || (kRuntimeISA == kX86) ||
-      (kRuntimeISA == kMips)) {
-    unhandled_instrumentation = instrumentation->NonJitProfilingActive();
-  } else {
-    unhandled_instrumentation = instrumentation->IsActive();
-  }
-  return unhandled_instrumentation || Dbg::IsDebuggerActive();
+  return instrumentation->NonJitProfilingActive() || Dbg::IsDebuggerActive();
 }
 
 
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 7cef823..a17252b 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -89,14 +89,7 @@
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
-/*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
+#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
@@ -1107,20 +1100,22 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    srl     a0, rINST, 8
-    seb     a0, a0                      # a0 <- sign-extended AA
-    dlsa    rPC, a0, rPC, 1             # rPC <- rPC + AA * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a0, 1f                      # AA * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a0, MterpCheckSuspendAndContinue
+    .extern MterpProfileBranch
+    srl     rINST, rINST, 8
+    seb     rINST, rINST                # rINST <- offset (sign-extended AA)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1135,19 +1130,21 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    lh      a0, 2(rPC)                  # a0 <- sign-extended AAAA
-    dlsa    rPC, a0, rPC, 1             # rPC <- rPC + AAAA * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a0, 1f                      # AA * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a0, MterpCheckSuspendAndContinue
+    .extern MterpProfileBranch
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1165,21 +1162,23 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    lh      a0, 2(rPC)                  # a0 <- aaaa (low)
+    .extern MterpProfileBranch
+    lh      rINST, 2(rPC)               # rINST <- aaaa (low)
     lh      a1, 4(rPC)                  # a1 <- AAAA (high)
-    ins     a0, a1, 16, 16              # a0 = sign-extended AAAAaaaa
-    dlsa    rPC, a0, rPC, 1             # rPC <- rPC + AAAAAAAA * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgtz    a0, 1f                      # AA * 2 > 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    blez    a0, MterpCheckSuspendAndContinue
+    ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1198,6 +1197,7 @@
      */
     /* op vAA, +BBBBBBBB */
     .extern MterpDoPackedSwitch
+    .extern MterpProfileBranch
     lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
     lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
     srl     a3, rINST, 8                # a3 <- AA
@@ -1205,16 +1205,20 @@
     GET_VREG a1, a3                     # a1 <- vAA
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     MterpDoPackedSwitch                       # v0 <- code-unit branch offset
-    dlsa    rPC, v0, rPC, 1             # rPC <- rPC + offset * 2
-    FETCH_INST                          # load rINST
-#if MTERP_SUSPEND
-    bgtz    v0, 1f                      # offset * 2 > 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    blez    v0, MterpCheckSuspendAndContinue
+    move    rINST, v0
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1234,6 +1238,7 @@
      */
     /* op vAA, +BBBBBBBB */
     .extern MterpDoSparseSwitch
+    .extern MterpProfileBranch
     lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
     lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
     srl     a3, rINST, 8                # a3 <- AA
@@ -1241,16 +1246,20 @@
     GET_VREG a1, a3                     # a1 <- vAA
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     MterpDoSparseSwitch                       # v0 <- code-unit branch offset
-    dlsa    rPC, v0, rPC, 1             # rPC <- rPC + offset * 2
-    FETCH_INST                          # load rINST
-#if MTERP_SUSPEND
-    bgtz    v0, 1f                      # offset * 2 > 0 => no suspend check
-    REFRESH_IBASE
-1:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    blez    v0, MterpCheckSuspendAndContinue
+    move    rINST, v0
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1438,28 +1447,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     beqc a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1477,28 +1486,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     bnec a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1516,28 +1525,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     bltc a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1555,28 +1564,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     bgec a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1594,28 +1603,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     bgtc a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1633,28 +1642,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended CCCC
+    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-
     blec a0, a1, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + CCCC * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # CCCC * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1672,26 +1681,26 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     beqzc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1709,26 +1718,26 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     bnezc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1746,26 +1755,26 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     bltzc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1783,26 +1792,26 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     bgezc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1820,26 +1829,26 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     bgtzc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1857,26 +1866,26 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    lh      a4, 2(rPC)                  # a4 <- sign-extended BBBB
+    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
+    lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-
     blezc a0, 1f
-    li      a4, 2                       # offset if branch not taken
+    li      rINST, 2                    # offset if branch not taken
 1:
-
-    dlsa    rPC, a4, rPC, 1             # rPC <- rPC + BBBB * 2
-    FETCH_INST                          # load rINST
-
-#if MTERP_SUSPEND
-    bgez    a4, 2f                      # BBBB * 2 >= 0 => no suspend check
-    REFRESH_IBASE
-2:
-#else
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    bltz    a4, MterpCheckSuspendAndContinue
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    jal     MterpProfileBranch          # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
 #endif
-
+    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
+    move    a0, rINST                   # a0 <- offset
+    FETCH_INST                          # load rINST
+    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -3166,6 +3175,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtual
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3174,6 +3184,8 @@
     jal     MterpInvokeVirtual
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3196,6 +3208,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuper
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3204,6 +3217,8 @@
     jal     MterpInvokeSuper
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3226,6 +3241,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirect
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3234,6 +3250,8 @@
     jal     MterpInvokeDirect
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3249,6 +3267,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStatic
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3257,6 +3276,8 @@
     jal     MterpInvokeStatic
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3272,6 +3293,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterface
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3280,6 +3302,8 @@
     jal     MterpInvokeInterface
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3316,6 +3340,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualRange
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3324,6 +3349,8 @@
     jal     MterpInvokeVirtualRange
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3339,6 +3366,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuperRange
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3347,6 +3375,8 @@
     jal     MterpInvokeSuperRange
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3362,6 +3392,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirectRange
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3370,6 +3401,8 @@
     jal     MterpInvokeDirectRange
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3385,6 +3418,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStaticRange
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3393,6 +3427,8 @@
     jal     MterpInvokeStaticRange
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -3408,6 +3444,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterfaceRange
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -3416,6 +3453,8 @@
     jal     MterpInvokeInterfaceRange
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -6962,6 +7001,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuick
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -6970,6 +7010,8 @@
     jal     MterpInvokeVirtualQuick
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -6985,6 +7027,7 @@
     /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
     /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuickRange
+    .extern MterpShouldSwitchInterpreters
     EXPORT_PC
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -6993,6 +7036,8 @@
     jal     MterpInvokeVirtualQuickRange
     beqzc   v0, MterpException
     FETCH_ADVANCE_INST 3
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
 
@@ -12256,6 +12301,7 @@
  *
  */
     .extern MterpHandleException
+    .extern MterpShouldSwitchInterpreters
 MterpException:
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
@@ -12266,8 +12312,11 @@
     REFRESH_IBASE
     daddu   rPC, a0, CODEITEM_INSNS_OFFSET
     dlsa    rPC, a1, rPC, 1                         # generate new dex_pc_ptr
-    sd      rPC, OFF_FP_DEX_PC_PTR(rFP)
+    /* Do we need to switch interpreters? */
+    jal     MterpShouldSwitchInterpreters
+    bnezc   v0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE v0
     GOTO_OPCODE v0
@@ -12288,10 +12337,24 @@
     EXPORT_PC
     move    a0, rSELF
     jal     MterpSuspendCheck                       # (self)
+    bnezc   v0, MterpFallback                       # Something in the environment changed, switch interpreters
     GET_INST_OPCODE v0                              # extract opcode from rINST
     GOTO_OPCODE v0                                  # jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST                               # rINST contains offset
+    jal     MterpLogOSR
+#endif
+    li      v0, 1                                   # Signal normal return
+    b       MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
     .extern MterpLogFallback
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 1545cb7..0b0f926 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -169,12 +169,16 @@
   return false;
 }
 
-class ScopedCodeCacheWrite {
+class ScopedCodeCacheWrite : ScopedTrace {
  public:
-  explicit ScopedCodeCacheWrite(MemMap* code_map) : code_map_(code_map) {
+  explicit ScopedCodeCacheWrite(MemMap* code_map)
+      : ScopedTrace("ScopedCodeCacheWrite"),
+        code_map_(code_map) {
+    ScopedTrace trace("mprotect all");
     CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtAll);
   }
   ~ScopedCodeCacheWrite() {
+    ScopedTrace trace("mprotect code");
     CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   }
  private:
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 103a8b7..19584ed 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -29,7 +29,6 @@
 #include "dex_cache.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
-#include "jit/profiling_info.h"
 #include "iftable.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
@@ -940,12 +939,6 @@
   }
   for (ArtMethod& method : GetMethods(pointer_size)) {
     method.VisitRoots(visitor, pointer_size);
-    if (method.GetDeclaringClassUnchecked() != nullptr && !method.IsNative()) {
-      ProfilingInfo* profiling_info = method.GetProfilingInfo(pointer_size);
-      if (profiling_info != nullptr) {
-        profiling_info->VisitRoots(visitor);
-      }
-    }
   }
 }
 
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 6ffd476..858849f 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -26,6 +26,7 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
+#include <atomic>
 
 namespace art {
 
@@ -473,6 +474,18 @@
   obj->SetField64<false>(MemberOffset(offset), conv.converted);
 }
 
+static void Unsafe_loadFence(JNIEnv*, jobject) {
+  std::atomic_thread_fence(std::memory_order_acquire);
+}
+
+static void Unsafe_storeFence(JNIEnv*, jobject) {
+  std::atomic_thread_fence(std::memory_order_release);
+}
+
+static void Unsafe_fullFence(JNIEnv*, jobject) {
+  std::atomic_thread_fence(std::memory_order_seq_cst);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
   NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
@@ -532,6 +545,11 @@
   OVERLOADED_NATIVE_METHOD(Unsafe, putLong, "!(JJ)V", putLongJJ),
   OVERLOADED_NATIVE_METHOD(Unsafe, putFloat, "!(JF)V", putFloatJF),
   OVERLOADED_NATIVE_METHOD(Unsafe, putDouble, "!(JD)V", putDoubleJD),
+
+  // CAS
+  NATIVE_METHOD(Unsafe, loadFence, "!()V"),
+  NATIVE_METHOD(Unsafe, storeFence, "!()V"),
+  NATIVE_METHOD(Unsafe, fullFence, "!()V"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 7e84b40..0e12d73 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -101,6 +101,17 @@
   kIntrinsicCas,
   kIntrinsicUnsafeGet,
   kIntrinsicUnsafePut,
+
+  // 1.8.
+  kIntrinsicUnsafeGetAndAddInt,
+  kIntrinsicUnsafeGetAndAddLong,
+  kIntrinsicUnsafeGetAndSetInt,
+  kIntrinsicUnsafeGetAndSetLong,
+  kIntrinsicUnsafeGetAndSetObject,
+  kIntrinsicUnsafeLoadFence,
+  kIntrinsicUnsafeStoreFence,
+  kIntrinsicUnsafeFullFence,
+
   kIntrinsicSystemArrayCopyCharArray,
   kIntrinsicSystemArrayCopy,
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index cf515b6..4c81d4f 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -171,9 +171,9 @@
   closedir(d);
 }
 
-// Dump checkpoint timeout in milliseconds. Larger amount on the host, as dumping will invoke
-// addr2line when available.
-static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 10000 : 20000;
+// Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be
+// overloaded with ANR dumps.
+static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000;
 
 // A closure used by Thread::Dump.
 class DumpCheckpoint FINAL : public Closure {
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 30f613c..b171b75 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -17,6 +17,7 @@
 #include "reg_type_cache-inl.h"
 
 #include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/casts.h"
 #include "base/scoped_arena_allocator.h"
 #include "base/stl_util.h"
@@ -351,9 +352,11 @@
     types.Copy(&left_merge->GetUnresolvedTypes());
     left_resolved = &left_merge->GetResolvedPart();
   } else if (left.IsUnresolvedTypes()) {
+    types.ClearAllBits();
     types.SetBit(left.GetId());
     left_resolved = &Zero();
   } else {
+    types.ClearAllBits();
     left_resolved = &left;
   }
 
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 22ac7e4..42a74f8 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -30,23 +30,14 @@
 namespace art {
 namespace verifier {
 
-class BaseRegTypeTest : public CommonRuntimeTest {
- public:
-  void PostRuntimeCreate() OVERRIDE {
-    stack.reset(new ArenaStack(Runtime::Current()->GetArenaPool()));
-    allocator.reset(new ScopedArenaAllocator(stack.get()));
-  }
-
-  std::unique_ptr<ArenaStack> stack;
-  std::unique_ptr<ScopedArenaAllocator> allocator;
-};
-
-class RegTypeTest : public BaseRegTypeTest {};
+class RegTypeTest : public CommonRuntimeTest {};
 
 TEST_F(RegTypeTest, ConstLoHi) {
   // Tests creating primitive types types.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type_const_0 = cache.FromCat1Const(10, true);
   const RegType& ref_type_const_1 = cache.FromCat1Const(10, true);
   const RegType& ref_type_const_2 = cache.FromCat1Const(30, true);
@@ -67,8 +58,10 @@
 }
 
 TEST_F(RegTypeTest, Pairs) {
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   int64_t val = static_cast<int32_t>(1234);
   const RegType& precise_lo = cache.FromCat2ConstLo(static_cast<int32_t>(val), true);
   const RegType& precise_hi = cache.FromCat2ConstHi(static_cast<int32_t>(val >> 32), true);
@@ -91,8 +84,10 @@
 }
 
 TEST_F(RegTypeTest, Primitives) {
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
 
   const RegType& bool_reg_type = cache.Boolean();
   EXPECT_FALSE(bool_reg_type.IsUndefined());
@@ -359,13 +354,15 @@
   EXPECT_TRUE(double_reg_type.HasClass());
 }
 
-class RegTypeReferenceTest : public BaseRegTypeTest {};
+class RegTypeReferenceTest : public CommonRuntimeTest {};
 
 TEST_F(RegTypeReferenceTest, JavalangObjectImprecise) {
   // Tests matching precisions. A reference type that was created precise doesn't
   // match the one that is imprecise.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& imprecise_obj = cache.JavaLangObject(false);
   const RegType& precise_obj = cache.JavaLangObject(true);
   const RegType& precise_obj_2 = cache.FromDescriptor(nullptr, "Ljava/lang/Object;", true);
@@ -379,8 +376,10 @@
 TEST_F(RegTypeReferenceTest, UnresolvedType) {
   // Tests creating unresolved types. Miss for the first time asking the cache and
   // a hit second time.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type_0 = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   EXPECT_TRUE(ref_type_0.IsUnresolvedReference());
   EXPECT_TRUE(ref_type_0.IsNonZeroReferenceTypes());
@@ -395,8 +394,10 @@
 
 TEST_F(RegTypeReferenceTest, UnresolvedUnintializedType) {
   // Tests creating types uninitialized types from unresolved types.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type_0 = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   EXPECT_TRUE(ref_type_0.IsUnresolvedReference());
   const RegType& ref_type = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
@@ -417,8 +418,10 @@
 
 TEST_F(RegTypeReferenceTest, Dump) {
   // Tests types for proper Dump messages.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& unresolved_ref = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   const RegType& unresolved_ref_another = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExistEither;", true);
   const RegType& resolved_ref = cache.JavaLangString();
@@ -442,8 +445,10 @@
   // Add a class to the cache then look for the same class and make sure it is  a
   // Hit the second time. Then check for the same effect when using
   // The JavaLangObject method instead of FromDescriptor. String class is final.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type = cache.JavaLangString();
   const RegType& ref_type_2 = cache.JavaLangString();
   const RegType& ref_type_3 = cache.FromDescriptor(nullptr, "Ljava/lang/String;", true);
@@ -462,8 +467,10 @@
   // Add a class to the cache then look for the same class and make sure it is  a
   // Hit the second time. Then I am checking for the same effect when using
   // The JavaLangObject method instead of FromDescriptor. Object Class in not final.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true, *allocator);
+  RegTypeCache cache(true, allocator);
   const RegType& ref_type = cache.JavaLangObject(true);
   const RegType& ref_type_2 = cache.JavaLangObject(true);
   const RegType& ref_type_3 = cache.FromDescriptor(nullptr, "Ljava/lang/Object;", true);
@@ -476,7 +483,9 @@
   // Tests merging logic
   // String and object , LUB is object.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
+  RegTypeCache cache_new(true, allocator);
   const RegType& string = cache_new.JavaLangString();
   const RegType& Object = cache_new.JavaLangObject(true);
   EXPECT_TRUE(string.Merge(Object, &cache_new).IsJavaLangObject());
@@ -498,8 +507,10 @@
 
 TEST_F(RegTypeTest, MergingFloat) {
   // Testing merging logic with float and float constants.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& float_type = cache_new.Float();
@@ -529,8 +540,10 @@
 
 TEST_F(RegTypeTest, MergingLong) {
   // Testing merging logic with long and long constants.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& long_lo_type = cache_new.LongLo();
@@ -583,8 +596,10 @@
 
 TEST_F(RegTypeTest, MergingDouble) {
   // Testing merging logic with double and double constants.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& double_lo_type = cache_new.DoubleLo();
@@ -637,8 +652,10 @@
 
 TEST_F(RegTypeTest, ConstPrecision) {
   // Tests creating primitive types types.
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true, *allocator);
+  RegTypeCache cache_new(true, allocator);
   const RegType& imprecise_const = cache_new.FromCat1Const(10, false);
   const RegType& precise_const = cache_new.FromCat1Const(10, true);
 
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index a9a7a05..b2f905e 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -40,7 +40,7 @@
   }
 
   private static Unsafe getUnsafe() throws Exception {
-    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+    Class<?> unsafeClass = Unsafe.class;
     Field f = unsafeClass.getDeclaredField("theUnsafe");
     f.setAccessible(true);
     return (Unsafe) f.get(null);
diff --git a/test/004-checker-UnsafeTest18/expected.txt b/test/004-checker-UnsafeTest18/expected.txt
new file mode 100644
index 0000000..651da72
--- /dev/null
+++ b/test/004-checker-UnsafeTest18/expected.txt
@@ -0,0 +1,2 @@
+starting
+passed
diff --git a/test/004-checker-UnsafeTest18/info.txt b/test/004-checker-UnsafeTest18/info.txt
new file mode 100644
index 0000000..0fca5eb
--- /dev/null
+++ b/test/004-checker-UnsafeTest18/info.txt
@@ -0,0 +1 @@
+Test support for 1.8 sun.misc.Unsafe.
diff --git a/test/004-checker-UnsafeTest18/src/Main.java b/test/004-checker-UnsafeTest18/src/Main.java
new file mode 100644
index 0000000..bb6de2e
--- /dev/null
+++ b/test/004-checker-UnsafeTest18/src/Main.java
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+
+import sun.misc.Unsafe;
+
+/**
+ * Checker test on the 1.8 unsafe operations. Note, this is by no means an
+ * exhaustive unit test for these CAS (compare-and-swap) and fence operations.
+ * Instead, this test ensures the methods are recognized as intrinsic and behave
+ * as expected.
+ */
+public class Main {
+
+  private static final Unsafe unsafe = getUnsafe();
+
+  private static Thread[] sThreads = new Thread[10];
+
+  //
+  // Fields accessed by setters and adders.
+  //
+
+  public int i = 0;
+  public long l = 0;
+  public Object o = null;
+
+  //
+  // Setters.
+  //
+
+  /// CHECK-START: int Main.set32(java.lang.Object, long, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeVirtual intrinsic:UnsafeGetAndSetInt
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int set32(Object o, long offset, int newValue) {
+    return unsafe.getAndSetInt(o, offset, newValue);
+  }
+
+  /// CHECK-START: long Main.set64(java.lang.Object, long, long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeVirtual intrinsic:UnsafeGetAndSetLong
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long set64(Object o, long offset, long newValue) {
+    return unsafe.getAndSetLong(o, offset, newValue);
+  }
+
+  /// CHECK-START: java.lang.Object Main.setObj(java.lang.Object, long, java.lang.Object) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:l\d+>> InvokeVirtual intrinsic:UnsafeGetAndSetObject
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static Object setObj(Object o, long offset, Object newValue) {
+    return unsafe.getAndSetObject(o, offset, newValue);
+  }
+
+  //
+  // Adders.
+  //
+
+  /// CHECK-START: int Main.add32(java.lang.Object, long, int) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeVirtual intrinsic:UnsafeGetAndAddInt
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int add32(Object o, long offset, int delta) {
+    return unsafe.getAndAddInt(o, offset, delta);
+  }
+
+  /// CHECK-START: long Main.add64(java.lang.Object, long, long) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeVirtual intrinsic:UnsafeGetAndAddLong
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long add64(Object o, long offset, long delta) {
+    return unsafe.getAndAddLong(o, offset, delta);
+  }
+
+  //
+  // Fences (native).
+  //
+
+  /// CHECK-START: void Main.load() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeLoadFence
+  private static void load() {
+    unsafe.loadFence();
+  }
+
+  /// CHECK-START: void Main.store() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeStoreFence
+  private static void store() {
+    unsafe.storeFence();
+  }
+
+  /// CHECK-START: void Main.full() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeFullFence
+  private static void full() {
+    unsafe.fullFence();
+  }
+
+  //
+  // Thread fork/join.
+  //
+
+  private static void fork(Runnable r) {
+    for (int i = 0; i < 10; i++) {
+      sThreads[i] = new Thread(r);
+      sThreads[i].start();
+    }
+  }
+
+  private static void join() {
+    try {
+      for (int i = 0; i < 10; i++) {
+        sThreads[i].join();
+      }
+    } catch (InterruptedException e) {
+      throw new Error("Failed join: " + e);
+    }
+  }
+
+  //
+  // Driver.
+  //
+
+  public static void main(String[] args) {
+    System.out.println("starting");
+
+    final Main m = new Main();
+
+    // Get the offsets.
+
+    final long intOffset, longOffset, objOffset;
+    try {
+      Field intField = Main.class.getDeclaredField("i");
+      Field longField = Main.class.getDeclaredField("l");
+      Field objField = Main.class.getDeclaredField("o");
+
+      intOffset = unsafe.objectFieldOffset(intField);
+      longOffset = unsafe.objectFieldOffset(longField);
+      objOffset = unsafe.objectFieldOffset(objField);
+
+    } catch (NoSuchFieldException e) {
+      throw new Error("No offset: " + e);
+    }
+
+    // Some sanity within same thread.
+
+    set32(m, intOffset, 3);
+    expectEquals32(3, m.i);
+
+    set64(m, longOffset, 7L);
+    expectEquals64(7L, m.l);
+
+    setObj(m, objOffset, m);
+    expectEqualsObj(m, m.o);
+
+    add32(m, intOffset, 11);
+    expectEquals32(14, m.i);
+
+    add64(m, longOffset, 13L);
+    expectEquals64(20L, m.l);
+
+    // Some sanity on setters within different threads.
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          set32(m, intOffset, i);
+      }
+    });
+    join();
+    expectEquals32(9, m.i);  // one thread's last value wins
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          set64(m, longOffset, (long) (100 + i));
+      }
+    });
+    join();
+    expectEquals64(109L, m.l);  // one thread's last value wins
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          setObj(m, objOffset, sThreads[i]);
+      }
+    });
+    join();
+    expectEqualsObj(sThreads[9], m.o);  // one thread's last value wins
+
+    // Some sanity on adders within different threads.
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          add32(m, intOffset, i + 1);
+      }
+    });
+    join();
+    expectEquals32(559, m.i);  // all values accounted for
+
+    fork(new Runnable() {
+      public void run() {
+        for (int i = 0; i < 10; i++)
+          add64(m, longOffset, (long) (i + 1));
+      }
+    });
+    join();
+    expectEquals64(659L, m.l);  // all values accounted for
+
+    // TODO: the fences
+
+    System.out.println("passed");
+  }
+
+  // Use reflection to implement "Unsafe.getUnsafe()";
+  private static Unsafe getUnsafe() {
+    try {
+      Class<?> unsafeClass = Unsafe.class;
+      Field f = unsafeClass.getDeclaredField("theUnsafe");
+      f.setAccessible(true);
+      return (Unsafe) f.get(null);
+    } catch (Exception e) {
+      throw new Error("Cannot get Unsafe instance");
+    }
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsObj(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/462-checker-inlining-across-dex-files/multidex.jpp b/test/462-checker-inlining-across-dex-files/multidex.jpp
new file mode 100644
index 0000000..ae55456
--- /dev/null
+++ b/test/462-checker-inlining-across-dex-files/multidex.jpp
@@ -0,0 +1,8 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
+
+AAA:
+  @@com.android.jack.annotations.ForceInMainDex
+  class AAA
+
diff --git a/test/556-invoke-super/multidex.jpp b/test/556-invoke-super/multidex.jpp
new file mode 100644
index 0000000..fe01801
--- /dev/null
+++ b/test/556-invoke-super/multidex.jpp
@@ -0,0 +1,4 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main*
+
diff --git a/test/569-checker-pattern-replacement/multidex.jpp b/test/569-checker-pattern-replacement/multidex.jpp
new file mode 100644
index 0000000..cfc8ad1
--- /dev/null
+++ b/test/569-checker-pattern-replacement/multidex.jpp
@@ -0,0 +1,8 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
+
+BaseInMainDex:
+  @@com.android.jack.annotations.ForceInMainDex
+  class BaseInMainDex
+
diff --git a/test/etc/default-build b/test/etc/default-build
index 6e855ec..5f78496 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -116,28 +116,33 @@
   SKIP_DX_MERGER="true"
 fi
 
-if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
-  # Jack does not support this configuration unless we specify how to partition the DEX file
-  # with a .jpp file.
-  USE_JACK="false"
-fi
-
 if [ ${USE_JACK} = "true" ]; then
   # Jack toolchain
   if [ "${HAS_SRC}" = "true" ]; then
-    ${JACK} ${JACK_ARGS} --output-jack src.jack src
-    imported_jack_files="--import src.jack"
+    if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+      # Compile src and src-multidex in the same .jack file. We will apply multidex partitioning
+      # when creating the output .dex file.
+      ${JACK} ${JACK_ARGS} --output-jack src.jack src src src-multidex
+      jack_extra_args="${jack_extra_args} -D jack.dex.output.policy=minimal-multidex"
+      jack_extra_args="${jack_extra_args} -D jack.preprocessor=true"
+      jack_extra_args="${jack_extra_args} -D jack.preprocessor.file=multidex.jpp"
+    else
+      ${JACK} ${JACK_ARGS} --output-jack src.jack src
+    fi
+    jack_extra_args="${jack_extra_args} --import src.jack"
   fi
 
   if [ "${HAS_SRC2}" = "true" ]; then
     ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
-    imported_jack_files="--import src2.jack ${imported_jack_files}"
+    # In case of duplicate classes, we want to take into account the classes from src2. Therefore
+    # we apply the 'keep-first' policy and import src2.jack file *before* the src.jack file.
+    jack_extra_args="${jack_extra_args} -D jack.import.type.policy=keep-first"
+    jack_extra_args="--import src2.jack ${jack_extra_args}"
   fi
 
-  # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
-  # class definitions from src2 first.
+  # Compile jack files into a DEX file.
   if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
-    ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+    ${JACK} ${JACK_ARGS} ${jack_extra_args} --output-dex .
   fi
 else
   # Legacy toolchain with javac+dx
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index fab4599..46100ae 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -270,10 +270,5 @@
   description: "Only work with --mode=activity",
   result: EXEC_FAILED,
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
-},
-{
-  description: "Temporary suppressing while test is fixed",
-  result: EXEC_FAILED,
-  names: [ "org.apache.harmony.tests.java.util.ArrayDequeTest#test_forEachRemaining_iterator" ]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 19a61dc..95f0c2d 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -16,20 +16,5 @@
   names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
           "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
   bug: 25883050
-},
-{
-  description: "libcore.java.lang.OldSystemTest#test_gc failure on armv8-concurrent-collector.",
-  result: EXEC_FAILED,
-  names: ["libcore.java.lang.OldSystemTest#test_gc"],
-  bug: 26155567
-},
-{
-  description: "TimeoutException on hammerhead-concurrent-collector",
-  result: EXEC_FAILED,
-  modes: [device],
-  names: ["libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
-          "libcore.java.text.SimpleDateFormatTest#testLocales",
-          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries"],
-  bug: 26711853
 }
 ]
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 9e085b5..1e9c763 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -46,5 +46,13 @@
 adb logcat -p
 
 echo -e "${green}Kill stalled dalvikvm processes${nc}"
-processes=$(adb shell "ps" | grep dalvikvm | awk '{print $2}')
-for i in $processes; do adb shell kill -9 $i; done
+# 'ps' on M can sometimes hang.
+timeout 2s adb shell "ps"
+if [ $? = 124 ]; then
+  echo -e "${green}Rebooting device to fix 'ps'${nc}"
+  adb reboot
+  adb wait-for-device root
+else
+  processes=$(adb shell "ps" | grep dalvikvm | awk '{print $2}')
+  for i in $processes; do adb shell kill -9 $i; done
+fi