Merge "Revert "Revert "Add profman tool: responsible to process profiles"""
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index dc53853..02bce41 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -118,8 +118,7 @@
 ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
-# b/25928358, illegal instruction on mips64r6 with -O0
-ART_TARGET_CLANG_mips64 := false
+ART_TARGET_CLANG_mips64 :=
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index ab70367..c9af1c6 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -205,7 +205,7 @@
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
       LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
-      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp -D jack.dex.output.multidex.legacy=true
+      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_JAVA_LIBRARY)
     $(5) := $$(LOCAL_INSTALLED_MODULE)
@@ -221,7 +221,7 @@
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
       LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
-      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp -D jack.dex.output.multidex.legacy=true
+      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
     $(6) := $$(LOCAL_INSTALLED_MODULE)
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index dc2c9c9..81b854e 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -291,6 +291,13 @@
   }
 
   {
+    const char* log_args = "-verbose:collector";
+    LogVerbosity log_verbosity = LogVerbosity();
+    log_verbosity.collector = true;
+    EXPECT_SINGLE_PARSE_VALUE(log_verbosity, log_args, M::Verbose);
+  }
+
+  {
     const char* log_args = "-verbose:oat";
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.oat = true;
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 740199d..c0a00cc 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -584,6 +584,8 @@
     for (size_t j = 0; j < verbose_options.size(); ++j) {
       if (verbose_options[j] == "class") {
         log_verbosity.class_linker = true;
+      } else if (verbose_options[j] == "collector") {
+        log_verbosity.collector = true;
       } else if (verbose_options[j] == "compiler") {
         log_verbosity.compiler = true;
       } else if (verbose_options[j] == "deopt") {
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 3dfb4b1..3f61e8e 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -90,7 +90,6 @@
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
-	optimizing/pc_relative_fixups_x86.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
@@ -181,6 +180,7 @@
 	linker/x86/relative_patcher_x86_base.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/intrinsics_x86.cc \
+	optimizing/pc_relative_fixups_x86.cc \
 	utils/x86/assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index ac0f4ca..d3859ca 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -24,7 +24,6 @@
 #include "debug/dwarf/headers.h"
 #include "debug/elf_compilation_unit.h"
 #include "dex_file-inl.h"
-#include "dex_file.h"
 #include "elf_builder.h"
 #include "stack_map.h"
 
@@ -90,8 +89,9 @@
         continue;
       }
 
-      ArrayRef<const SrcMapElem> src_mapping_table;
-      std::vector<SrcMapElem> src_mapping_table_from_stack_maps;
+      uint32_t prologue_end = std::numeric_limits<uint32_t>::max();
+      ArrayRef<const SrcMapElem> pc2dex_map;
+      std::vector<SrcMapElem> pc2dex_map_from_stack_maps;
       if (mi->IsFromOptimizingCompiler()) {
         // Use stack maps to create mapping table from pc to dex.
         const CodeInfo code_info(mi->compiled_method->GetVmapTable().data());
@@ -99,35 +99,36 @@
         for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
           StackMap stack_map = code_info.GetStackMapAt(s, encoding);
           DCHECK(stack_map.IsValid());
-          // Emit only locations where we have local-variable information.
-          // In particular, skip mappings inside the prologue.
+          const uint32_t pc = stack_map.GetNativePcOffset(encoding);
+          const int32_t dex = stack_map.GetDexPc(encoding);
+          pc2dex_map_from_stack_maps.push_back({pc, dex});
           if (stack_map.HasDexRegisterMap(encoding)) {
-            const uint32_t pc = stack_map.GetNativePcOffset(encoding);
-            const int32_t dex = stack_map.GetDexPc(encoding);
-            src_mapping_table_from_stack_maps.push_back({pc, dex});
+            // Guess that the first map with local variables is the end of prologue.
+            prologue_end = std::min(prologue_end, pc);
           }
         }
-        std::sort(src_mapping_table_from_stack_maps.begin(),
-                  src_mapping_table_from_stack_maps.end());
-        src_mapping_table = ArrayRef<const SrcMapElem>(src_mapping_table_from_stack_maps);
+        std::sort(pc2dex_map_from_stack_maps.begin(),
+                  pc2dex_map_from_stack_maps.end());
+        pc2dex_map = ArrayRef<const SrcMapElem>(pc2dex_map_from_stack_maps);
       } else {
         // Use the mapping table provided by the quick compiler.
-        src_mapping_table = mi->compiled_method->GetSrcMappingTable();
+        pc2dex_map = mi->compiled_method->GetSrcMappingTable();
+        prologue_end = 0;
       }
 
-      if (src_mapping_table.empty()) {
+      if (pc2dex_map.empty()) {
         continue;
       }
 
       Elf_Addr method_address = text_address + mi->low_pc;
 
-      PositionInfos position_infos;
+      PositionInfos dex2line_map;
       const DexFile* dex = mi->dex_file;
-      if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &position_infos)) {
+      if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &dex2line_map)) {
         continue;
       }
 
-      if (position_infos.empty()) {
+      if (dex2line_map.empty()) {
         continue;
       }
 
@@ -184,21 +185,25 @@
       // Generate mapping opcodes from PC to Java lines.
       if (file_index != 0) {
         bool first = true;
-        for (SrcMapElem pc2dex : src_mapping_table) {
+        for (SrcMapElem pc2dex : pc2dex_map) {
           uint32_t pc = pc2dex.from_;
           int dex_pc = pc2dex.to_;
           // Find mapping with address with is greater than our dex pc; then go back one step.
-          auto ub = std::upper_bound(position_infos.begin(), position_infos.end(), dex_pc,
+          auto dex2line = std::upper_bound(
+              dex2line_map.begin(),
+              dex2line_map.end(),
+              dex_pc,
               [](uint32_t address, const DexFile::PositionInfo& entry) {
                   return address < entry.address_;
               });
-          if (ub != position_infos.begin()) {
-            int line = (--ub)->line_;
+          // Look for first valid mapping after the prologue.
+          if (dex2line != dex2line_map.begin() && pc >= prologue_end) {
+            int line = (--dex2line)->line_;
             if (first) {
               first = false;
               if (pc > 0) {
                 // Assume that any preceding code is prologue.
-                int first_line = position_infos.front().line_;
+                int first_line = dex2line_map.front().line_;
                 // Prologue is not a sensible place for a breakpoint.
                 opcodes.NegateStmt();
                 opcodes.AddRow(method_address, first_line);
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index a19b36f..8fd20aa 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
 #define ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
 
+#include <cstring>
 #include <map>
 
 #include "arch/instruction_set.h"
@@ -172,11 +173,6 @@
     return;
   }
 
-  dwarf::Writer<> debug_loc(debug_loc_buffer);
-  dwarf::Writer<> debug_ranges(debug_ranges_buffer);
-  debug_info->WriteSecOffset(dwarf::DW_AT_location, debug_loc.size());
-  debug_info->WriteSecOffset(dwarf::DW_AT_start_scope, debug_ranges.size());
-
   std::vector<VariableLocation> variable_locations = GetVariableLocations(
       method_info,
       vreg,
@@ -185,6 +181,8 @@
       dex_pc_high);
 
   // Write .debug_loc entries.
+  dwarf::Writer<> debug_loc(debug_loc_buffer);
+  const size_t debug_loc_offset = debug_loc.size();
   const bool is64bit = Is64BitInstructionSet(isa);
   std::vector<uint8_t> expr_buffer;
   for (const VariableLocation& variable_location : variable_locations) {
@@ -271,6 +269,8 @@
 
   // Write .debug_ranges entries.
   // This includes ranges where the variable is in scope but the location is not known.
+  dwarf::Writer<> debug_ranges(debug_ranges_buffer);
+  size_t debug_ranges_offset = debug_ranges.size();
   for (size_t i = 0; i < variable_locations.size(); i++) {
     uint32_t low_pc = variable_locations[i].low_pc;
     uint32_t high_pc = variable_locations[i].high_pc;
@@ -294,6 +294,23 @@
     debug_ranges.PushUint32(0);
     debug_ranges.PushUint32(0);
   }
+
+  // Simple de-duplication - check whether this entry is same as the last one (or tail of it).
+  size_t debug_ranges_entry_size = debug_ranges.size() - debug_ranges_offset;
+  if (debug_ranges_offset >= debug_ranges_entry_size) {
+    size_t previous_offset = debug_ranges_offset - debug_ranges_entry_size;
+    if (memcmp(debug_ranges_buffer->data() + previous_offset,
+               debug_ranges_buffer->data() + debug_ranges_offset,
+               debug_ranges_entry_size) == 0) {
+      // Remove what we have just written and use the last entry instead.
+      debug_ranges_buffer->resize(debug_ranges_offset);
+      debug_ranges_offset = previous_offset;
+    }
+  }
+
+  // Write attributes to .debug_info.
+  debug_info->WriteSecOffset(dwarf::DW_AT_location, debug_loc_offset);
+  debug_info->WriteSecOffset(dwarf::DW_AT_start_scope, debug_ranges_offset);
 }
 
 }  // namespace debug
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index eb4915b..6f9dd6d 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -1679,9 +1679,7 @@
       if (opcode == Instruction::NEW_INSTANCE) {
         uint32_t type_idx = mir->dalvikInsn.vB;
         if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
-          // Change NEW_INSTANCE into CONST_4 of 0
-          mir->dalvikInsn.opcode = Instruction::CONST_4;
-          mir->dalvikInsn.vB = 0;
+          LOG(FATAL) << "Quick cannot compile String allocations";
         }
       } else if ((opcode == Instruction::INVOKE_DIRECT) ||
                  (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
@@ -1689,52 +1687,13 @@
         DexFileMethodInliner* inliner =
             cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
         if (inliner->IsStringInitMethodIndex(method_idx)) {
-          bool is_range = (opcode == Instruction::INVOKE_DIRECT_RANGE);
-          uint32_t orig_this_reg = is_range ? mir->dalvikInsn.vC : mir->dalvikInsn.arg[0];
-          // Remove this pointer from string init and change to static call.
-          mir->dalvikInsn.vA--;
-          if (!is_range) {
-            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC;
-            for (uint32_t i = 0; i < mir->dalvikInsn.vA; i++) {
-              mir->dalvikInsn.arg[i] = mir->dalvikInsn.arg[i + 1];
-            }
-          } else {
-            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC_RANGE;
-            mir->dalvikInsn.vC++;
-          }
-          // Insert a move-result instruction to the original this pointer reg.
-          MIR* move_result_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
-          move_result_mir->dalvikInsn.opcode = Instruction::MOVE_RESULT_OBJECT;
-          move_result_mir->dalvikInsn.vA = orig_this_reg;
-          move_result_mir->offset = mir->offset;
-          move_result_mir->m_unit_index = mir->m_unit_index;
-          bb->InsertMIRAfter(mir, move_result_mir);
-          // Add additional moves if this pointer was copied to other registers.
-          const VerifiedMethod* verified_method =
-              cu_->compiler_driver->GetVerifiedMethod(cu_->dex_file, cu_->method_idx);
-          DCHECK(verified_method != nullptr);
-          const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
-              verified_method->GetStringInitPcRegMap();
-          auto map_it = string_init_map.find(mir->offset);
-          if (map_it != string_init_map.end()) {
-            const std::set<uint32_t>& reg_set = map_it->second;
-            for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-              MIR* move_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
-              move_mir->dalvikInsn.opcode = Instruction::MOVE_OBJECT;
-              move_mir->dalvikInsn.vA = *set_it;
-              move_mir->dalvikInsn.vB = orig_this_reg;
-              move_mir->offset = mir->offset;
-              move_mir->m_unit_index = mir->m_unit_index;
-              bb->InsertMIRAfter(move_result_mir, move_mir);
-            }
-          }
+          LOG(FATAL) << "Quick cannot compile String allocations";
         }
       }
     }
   }
 }
 
-
 bool MIRGraph::EliminateSuspendChecksGate() {
   if (kLeafOptimization ||           // Incompatible (could create loops without suspend checks).
       (cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 027290f..49768de 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -509,7 +509,8 @@
 }
 
 bool QuickCompiler::CanCompileInstruction(const MIR* mir,
-                                          const DexFile& dex_file) const {
+                                          const DexFile& dex_file,
+                                          CompilationUnit* cu) const {
   switch (mir->dalvikInsn.opcode) {
     // Quick compiler won't support new instruction semantics to invoke-super into an interface
     // method
@@ -522,6 +523,13 @@
       // False if we are an interface i.e. !(java_access_flags & kAccInterface)
       return class_def != nullptr && ((class_def->GetJavaAccessFlags() & kAccInterface) == 0);
     }
+    case Instruction::NEW_INSTANCE: {
+      uint32_t type_idx = mir->dalvikInsn.vB;
+      if (cu->compiler_driver->IsStringTypeIndex(type_idx, cu->dex_file)) {
+        return false;
+      }
+      return true;
+    }
     default:
       return true;
   }
@@ -567,7 +575,7 @@
               << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
         }
         return false;
-      } else if (!CanCompileInstruction(mir, dex_file)) {
+      } else if (!CanCompileInstruction(mir, dex_file, cu)) {
         VLOG(compiler) << "Cannot compile dalvik opcode : " << mir->dalvikInsn.opcode;
         return false;
       }
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 55f45f1..f32cf86 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -75,7 +75,7 @@
   explicit QuickCompiler(CompilerDriver* driver);
 
  private:
-  bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file) const;
+  bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file, CompilationUnit* cu) const;
 
   std::unique_ptr<PassManager> pre_opt_pass_manager_;
   std::unique_ptr<PassManager> post_opt_pass_manager_;
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 0355f11..9ae2164 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -37,20 +37,16 @@
 
 namespace art {
 
-VerifiedMethod::VerifiedMethod(uint32_t encountered_error_types,
-                               bool has_runtime_throw,
-                               const SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map)
+VerifiedMethod::VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw)
     : encountered_error_types_(encountered_error_types),
-      has_runtime_throw_(has_runtime_throw),
-      string_init_pc_reg_map_(string_init_pc_reg_map) {
+      has_runtime_throw_(has_runtime_throw) {
 }
 
 const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
                                              bool compile) {
   std::unique_ptr<VerifiedMethod> verified_method(
       new VerifiedMethod(method_verifier->GetEncounteredFailureTypes(),
-                         method_verifier->HasInstructionThatWillThrow(),
-                         method_verifier->GetStringInitPcRegMap()));
+                         method_verifier->HasInstructionThatWillThrow()));
 
   if (compile) {
     /* Generate a register map. */
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 74fcb07..12d0219 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -83,14 +83,8 @@
     return has_runtime_throw_;
   }
 
-  const SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() const {
-    return string_init_pc_reg_map_;
-  }
-
  private:
-  VerifiedMethod(uint32_t encountered_error_types,
-                 bool has_runtime_throw,
-                 const SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map);
+  VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw);
 
   /*
    * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
@@ -129,10 +123,6 @@
 
   const uint32_t encountered_error_types_;
   const bool has_runtime_throw_;
-
-  // Copy of mapping generated by verifier of dex PCs of string init invocations
-  // to the set of other registers that the receiver has been copied into.
-  const SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index bc5c6ca..510613e 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -190,7 +190,8 @@
 
 void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const {
   if (swap_space_.get() != nullptr) {
-    os << " swap=" << PrettySize(swap_space_->GetSize());
+    const size_t swap_size = swap_space_->GetSize();
+    os << " swap=" << PrettySize(swap_size) << " (" << swap_size << "B)";
   }
   if (extended) {
     Thread* self = Thread::Current();
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 0d65bc7..3cb63e7 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -186,13 +186,7 @@
       } else {
         // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
-        std::string temp;
-        const DexFile::TypeId* type_id =
-           dex_file->FindTypeId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
-        if (type_id != nullptr) {
-          // medium path, needs check of static storage base being initialized
-          storage_idx = dex_file->GetIndexForTypeId(*type_id);
-        }
+        storage_idx = resolved_member->GetDeclaringClass()->FindTypeIndexInOtherDexFile(*dex_file);
       }
       if (storage_idx != DexFile::kDexNoIndex) {
         *storage_index = storage_idx;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f078bf6..db8c3ab 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -378,7 +378,6 @@
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info) {
   DCHECK(compiler_options_ != nullptr);
-  DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
 
   compiler_->Init();
@@ -2493,6 +2492,7 @@
                    parallel_thread_pool_.get(),
                    parallel_thread_count_,
                    timings);
+    Runtime::Current()->ReclaimArenaPoolMemory();
   }
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2732,16 +2732,18 @@
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
-  const ArenaPool* arena_pool = runtime->GetArenaPool();
-  gc::Heap* const heap = runtime->GetHeap();
-  oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
-  oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+  const ArenaPool* const arena_pool = runtime->GetArenaPool();
+  const gc::Heap* const heap = runtime->GetHeap();
+  const size_t arena_alloc = arena_pool->GetBytesAllocated();
+  const size_t java_alloc = heap->GetBytesAllocated();
+  oss << "arena alloc=" << PrettySize(arena_alloc) << " (" << arena_alloc << "B)";
+  oss << " java alloc=" << PrettySize(java_alloc) << " (" << java_alloc << "B)";
 #if defined(__BIONIC__) || defined(__GLIBC__)
-  struct mallinfo info = mallinfo();
+  const struct mallinfo info = mallinfo();
   const size_t allocated_space = static_cast<size_t>(info.uordblks);
   const size_t free_space = static_cast<size_t>(info.fordblks);
-  oss << " native alloc=" << PrettySize(allocated_space) << " free="
-      << PrettySize(free_space);
+  oss << " native alloc=" << PrettySize(allocated_space) << " (" << allocated_space << "B)"
+      << " free=" << PrettySize(free_space) << " (" << free_space << "B)";
 #endif
   compiled_method_storage_.DumpMemoryUsage(oss, extended);
   return oss.str();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 5e35cbb..d8f23f7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -138,6 +138,7 @@
       REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_);
 
   VerificationResults* GetVerificationResults() const {
+    DCHECK(Runtime::Current()->IsAotCompiler());
     return verification_results_;
   }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 73574ba..d50528e 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -124,7 +124,10 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
-    ComputeLazyFieldsForImageClasses();  // Add useful information
+    if (!compile_app_image_) {
+      // Avoid for app image since this may increase RAM and image size.
+      ComputeLazyFieldsForImageClasses();  // Add useful information
+    }
   }
   heap->CollectGarbage(false);  // Remove garbage.
 
@@ -735,20 +738,20 @@
   return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
 }
 
-bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
+bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
   bool early_exit = false;
   std::unordered_set<mirror::Class*> visited;
-  return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited);
+  return PruneAppImageClassInternal(klass, &early_exit, &visited);
 }
 
-bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal(
+bool ImageWriter::PruneAppImageClassInternal(
     mirror::Class* klass,
     bool* early_exit,
     std::unordered_set<mirror::Class*>* visited) {
   DCHECK(early_exit != nullptr);
   DCHECK(visited != nullptr);
   DCHECK(compile_app_image_);
-  if (klass == nullptr) {
+  if (klass == nullptr || IsInBootImage(klass)) {
     return false;
   }
   auto found = prune_class_memo_.find(klass);
@@ -762,7 +765,11 @@
     return false;
   }
   visited->emplace(klass);
-  bool result = IsBootClassLoaderNonImageClass(klass);
+  bool result = IsBootClassLoaderClass(klass);
+  std::string temp;
+  // Prune if not an image class, this handles any broken sets of image classes such as having a
+  // class in the set but not it's superclass.
+  result = result || !compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
   bool my_early_exit = false;  // Only for ourselves, ignore caller.
   // Remove classes that failed to verify since we don't want to have java.lang.VerifyError in the
   // app image.
@@ -775,17 +782,15 @@
     // Check interfaces since these wont be visited through VisitReferences.)
     mirror::IfTable* if_table = klass->GetIfTable();
     for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-      result = result || ContainsBootClassLoaderNonImageClassInternal(
-          if_table->GetInterface(i),
-          &my_early_exit,
-          visited);
+      result = result || PruneAppImageClassInternal(if_table->GetInterface(i),
+                                                    &my_early_exit,
+                                                    visited);
     }
   }
   if (klass->IsObjectArrayClass()) {
-    result = result || ContainsBootClassLoaderNonImageClassInternal(
-        klass->GetComponentType(),
-        &my_early_exit,
-        visited);
+    result = result || PruneAppImageClassInternal(klass->GetComponentType(),
+                                                  &my_early_exit,
+                                                  visited);
   }
   // Check static fields and their classes.
   size_t num_static_fields = klass->NumReferenceStaticFields();
@@ -798,27 +803,22 @@
       mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
       if (ref != nullptr) {
         if (ref->IsClass()) {
-          result = result ||
-                   ContainsBootClassLoaderNonImageClassInternal(
-                       ref->AsClass(),
-                       &my_early_exit,
-                       visited);
+          result = result || PruneAppImageClassInternal(ref->AsClass(),
+                                                        &my_early_exit,
+                                                        visited);
+        } else {
+          result = result || PruneAppImageClassInternal(ref->GetClass(),
+                                                        &my_early_exit,
+                                                        visited);
         }
-        result = result ||
-                 ContainsBootClassLoaderNonImageClassInternal(
-                     ref->GetClass(),
-                     &my_early_exit,
-                     visited);
       }
       field_offset = MemberOffset(field_offset.Uint32Value() +
                                   sizeof(mirror::HeapReference<mirror::Object>));
     }
   }
-  result = result ||
-           ContainsBootClassLoaderNonImageClassInternal(
-               klass->GetSuperClass(),
-               &my_early_exit,
-               visited);
+  result = result || PruneAppImageClassInternal(klass->GetSuperClass(),
+                                                &my_early_exit,
+                                                visited);
   // Erase the element we stored earlier since we are exiting the function.
   auto it = visited->find(klass);
   DCHECK(it != visited->end());
@@ -837,15 +837,21 @@
   if (klass == nullptr) {
     return false;
   }
+  if (compile_app_image_ && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+    // Already in boot image, return true.
+    return true;
+  }
+  std::string temp;
+  if (!compiler_driver_.IsImageClass(klass->GetDescriptor(&temp))) {
+    return false;
+  }
   if (compile_app_image_) {
     // For app images, we need to prune boot loader classes that are not in the boot image since
     // these may have already been loaded when the app image is loaded.
     // Keep classes in the boot image space since we don't want to re-resolve these.
-    return Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass) ||
-        !ContainsBootClassLoaderNonImageClass(klass);
+    return !PruneAppImageClass(klass);
   }
-  std::string temp;
-  return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
+  return true;
 }
 
 class NonImageClassesVisitor : public ClassVisitor {
@@ -873,6 +879,7 @@
   class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
+  VLOG(compiler) << "Pruning " << visitor.classes_to_prune_.size() << " classes";
   for (mirror::Class* klass : visitor.classes_to_prune_) {
     std::string temp;
     const char* name = klass->GetDescriptor(&temp);
@@ -891,10 +898,10 @@
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
   ReaderMutexLock mu2(self, *class_linker->DexLock());
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-    if (dex_cache == nullptr) {
+    if (self->IsJWeakCleared(data.weak_root)) {
       continue;
     }
+    mirror::DexCache* dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != nullptr && !KeepClass(klass)) {
@@ -907,10 +914,10 @@
           mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
       DCHECK(method != nullptr) << "Expected resolution method instead of null method";
       mirror::Class* declaring_class = method->GetDeclaringClass();
-      // Miranda methods may be held live by a class which was not an image class but have a
+      // Copied methods may be held live by a class which was not an image class but have a
       // declaring class which is an image class. Set it to the resolution method to be safe and
       // prevent dangling pointers.
-      if (method->IsMiranda() || !KeepClass(declaring_class)) {
+      if (method->MightBeCopied() || !KeepClass(declaring_class)) {
         mirror::DexCache::SetElementPtrSize(resolved_methods,
                                             i,
                                             resolution_method,
@@ -1820,12 +1827,16 @@
 }
 
 template <typename T>
-T* ImageWriter::NativeLocationInImage(T* obj, const char* oat_filename) {
+T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
-    ImageInfo& image_info = GetImageInfo(oat_filename);
-    return reinterpret_cast<T*>(image_info.image_begin_ + NativeOffsetInImage(obj));
+    auto it = native_object_relocations_.find(obj);
+    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+        << Runtime::Current()->GetHeap()->DumpSpaces();
+    const NativeObjectRelocation& relocation = it->second;
+    ImageInfo& image_info = GetImageInfo(relocation.oat_filename);
+    return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
   }
 }
 
@@ -1842,33 +1853,19 @@
 
 class NativeLocationVisitor {
  public:
-  explicit NativeLocationVisitor(ImageWriter* image_writer, const char* oat_filename)
-      : image_writer_(image_writer), oat_filename_(oat_filename) {}
+  explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
   template <typename T>
   T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return image_writer_->NativeLocationInImage(ptr, oat_filename_);
-  }
-
-  ArtMethod* operator()(ArtMethod* method) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    const char* oat_filename = method->IsRuntimeMethod() ? image_writer_->GetDefaultOatFilename() :
-        image_writer_->GetOatFilenameForDexCache(method->GetDexCache());
-    return image_writer_->NativeLocationInImage(method, oat_filename);
-  }
-
-  ArtField* operator()(ArtField* field) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    const char* oat_filename = image_writer_->GetOatFilenameForDexCache(field->GetDexCache());
-    return image_writer_->NativeLocationInImage(field, oat_filename);
+    return image_writer_->NativeLocationInImage(ptr);
   }
 
  private:
   ImageWriter* const image_writer_;
-  const char* oat_filename_;
 };
 
 void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
-  const char* oat_filename = GetOatFilename(orig);
-  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
+  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
 
@@ -1952,11 +1949,10 @@
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-  const char* oat_filename = GetOatFilenameForDexCache(orig_dex_cache);
   GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
-                                               NativeLocationInImage(orig_strings, oat_filename),
+                                               NativeLocationInImage(orig_strings),
                                                /*pointer size*/8u);
     orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
                                  ImageAddressVisitor(this));
@@ -1964,7 +1960,7 @@
   GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
-                                               NativeLocationInImage(orig_types, oat_filename),
+                                               NativeLocationInImage(orig_types),
                                                /*pointer size*/8u);
     orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
                                        ImageAddressVisitor(this));
@@ -1972,32 +1968,25 @@
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
-                                               NativeLocationInImage(orig_methods, oat_filename),
+                                               NativeLocationInImage(orig_methods),
                                                /*pointer size*/8u);
     ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
-      const char* method_oat_filename;
-      if (orig == nullptr || orig->IsRuntimeMethod()) {
-        method_oat_filename = default_oat_filename_;
-      } else {
-        method_oat_filename = GetOatFilenameForDexCache(orig->GetDexCache());
-      }
-      ArtMethod* copy = NativeLocationInImage(orig, method_oat_filename);
+      // NativeLocationInImage also handles runtime methods since these have relocation info.
+      ArtMethod* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
   ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
-                                               NativeLocationInImage(orig_fields, oat_filename),
+                                               NativeLocationInImage(orig_fields),
                                                /*pointer size*/8u);
     ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      const char* field_oat_filename =
-          orig == nullptr ? default_oat_filename_ : GetOatFilenameForDexCache(orig->GetDexCache());
-      ArtField* copy = NativeLocationInImage(orig, field_oat_filename);
+      ArtField* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
@@ -2089,20 +2078,10 @@
 
   copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
 
-  const char* oat_filename;
-  if (orig->IsRuntimeMethod() || compile_app_image_) {
-    oat_filename = default_oat_filename_;
-  } else {
-    auto it = dex_file_oat_filename_map_.find(orig->GetDexFile());
-    DCHECK(it != dex_file_oat_filename_map_.end()) << orig->GetDexFile()->GetLocation();
-    oat_filename = it->second;
-  }
   ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
-  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods, oat_filename),
-                                   target_ptr_size_);
+  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
   GcRoot<mirror::Class>* orig_resolved_types = orig->GetDexCacheResolvedTypes(target_ptr_size_);
-  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types, oat_filename),
-                                 target_ptr_size_);
+  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types), target_ptr_size_);
 
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
@@ -2324,6 +2303,8 @@
     image_info_map_.emplace(oat_filename, ImageInfo());
   }
   std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+  CHECK_EQ(compile_app_image, !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty())
+      << "Compiling a boot image should occur iff there are no boot image spaces loaded";
 }
 
 ImageWriter::ImageInfo::ImageInfo()
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 9371d9f..ee204c5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -410,16 +410,18 @@
   // Return true if klass is loaded by the boot class loader but not in the boot image.
   bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Return true if klass depends on a boot class loader non image class live. We want to prune
-  // these classes since we do not want any boot class loader classes in the image. This means that
+  // Return true if klass depends on a boot class loader non image class. We want to prune these
+  // classes since we do not want any boot class loader classes in the image. This means that
   // we also cannot have any classes which refer to these boot class loader non image classes.
-  bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
+  // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
+  // driver.
+  bool PruneAppImageClass(mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // early_exit is true if we had a cyclic dependency anywhere down the chain.
-  bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass,
-                                                    bool* early_exit,
-                                                    std::unordered_set<mirror::Class*>* visited)
+  bool PruneAppImageClassInternal(mirror::Class* klass,
+                                  bool* early_exit,
+                                  std::unordered_set<mirror::Class*>* visited)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
@@ -428,7 +430,7 @@
 
   // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
-  T* NativeLocationInImage(T* obj, const char* oat_filename) SHARED_REQUIRES(Locks::mutator_lock_);
+  T* NativeLocationInImage(T* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Location of where the temporary copy of the object currently is.
   template <typename T>
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3fe7861..909d682 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -23,10 +23,7 @@
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "base/unix_file/fd_file.h"
-#include "compiler_callbacks.h"
 #include "debug/elf_debug_writer.h"
-#include "dex/pass_manager.h"
-#include "dex/quick_compiler_callbacks.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "jit/debugger_interface.h"
@@ -36,7 +33,6 @@
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "thread_list.h"
-#include "verifier/method_verifier-inl.h"
 
 namespace art {
 namespace jit {
@@ -45,11 +41,10 @@
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
+extern "C" void* jit_load(bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
-  *callbacks = jit_compiler->GetCompilerCallbacks();
   *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
@@ -151,14 +146,10 @@
     instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
   }
   cumulative_logger_.reset(new CumulativeLogger("jit times"));
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
   method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              CompilerCallbacks::CallbackMode::kCompileApp));
   compiler_driver_.reset(new CompilerDriver(
       compiler_options_.get(),
-      verification_results_.get(),
+      /* verification_results */ nullptr,
       method_inliner_map_.get(),
       Compiler::kOptimizing,
       instruction_set,
@@ -251,9 +242,5 @@
   return success;
 }
 
-CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const {
-  return callbacks_.get();
-}
-
 }  // namespace jit
 }  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 894d29e..d3b404a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -415,7 +415,9 @@
     size_t visited_virtuals = 0;
     // TODO We should also check copied methods in this test.
     for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
-      EXPECT_FALSE(m.IsMiranda());
+      if (!klass->IsInterface()) {
+        EXPECT_FALSE(m.MightBeCopied());
+      }
       CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
       ++method_index;
       ++visited_virtuals;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 3eda863..c500ea4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2107,7 +2107,6 @@
   LocationSummary* locations = instruction->GetLocations();
   Register res = locations->Out().AsRegister<Register>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
-  bool gt_bias = instruction->IsGtBias();
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   //  0 if: left == right
@@ -2141,6 +2140,7 @@
     }
 
     case Primitive::kPrimFloat: {
+      bool gt_bias = instruction->IsGtBias();
       FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
       FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
       MipsLabel done;
@@ -2180,6 +2180,7 @@
       break;
     }
     case Primitive::kPrimDouble: {
+      bool gt_bias = instruction->IsGtBias();
       FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
       FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
       MipsLabel done;
@@ -3953,28 +3954,19 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
-    return;
-  }
-
+void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  Register temp = temp_location.AsRegister<Register>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
 
   // temp = object->GetClass();
-  if (receiver.IsStackSlot()) {
-    __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
-    __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
-  } else {
-    DCHECK(receiver.IsRegister());
-    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
-  }
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  DCHECK(receiver.IsRegister());
+  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3982,6 +3974,14 @@
   // T9();
   __ Jalr(T9);
   __ Nop();
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 12964b0..dd0641c 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -353,10 +353,7 @@
       MethodReference target_method) OVERRIDE;
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
-                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
-  }
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 119084e..e3a44f1 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1727,7 +1727,6 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister res = locations->Out().AsRegister<GpuRegister>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
-  bool gt_bias = instruction->IsGtBias();
 
   //  0 if: left == right
   //  1 if: left  > right
@@ -1769,7 +1768,7 @@
       __ CmpEqS(FTMP, lhs, rhs);
       __ LoadConst32(res, 0);
       __ Bc1nez(FTMP, &done);
-      if (gt_bias) {
+      if (instruction->IsGtBias()) {
         __ CmpLtS(FTMP, lhs, rhs);
         __ LoadConst32(res, -1);
         __ Bc1nez(FTMP, &done);
@@ -1791,7 +1790,7 @@
       __ CmpEqD(FTMP, lhs, rhs);
       __ LoadConst32(res, 0);
       __ Bc1nez(FTMP, &done);
-      if (gt_bias) {
+      if (instruction->IsGtBias()) {
         __ CmpLtD(FTMP, lhs, rhs);
         __ LoadConst32(res, -1);
         __ Bc1nez(FTMP, &done);
@@ -4258,4 +4257,3 @@
 
 }  // namespace mips64
 }  // namespace art
-
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 1161253..eb7315a 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -349,7 +349,7 @@
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL);
+    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64";
   }
 
  private:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 07edd97..f032f51 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,7 +26,6 @@
 #include "intrinsics_x86.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
-#include "pc_relative_fixups_x86.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -1276,7 +1275,7 @@
     }
     // Must be equal high, so compare the lows.
     codegen_->Compare32BitValue(left_low, val_low);
-  } else {
+  } else if (right.IsRegisterPair()) {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
 
@@ -1291,6 +1290,19 @@
     }
     // Must be equal high, so compare the lows.
     __ cmpl(left_low, right_low);
+  } else {
+    DCHECK(right.IsDoubleStackSlot());
+    __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+    if (if_cond == kCondNE) {
+      __ j(X86Condition(true_high_cond), true_label);
+    } else if (if_cond == kCondEQ) {
+      __ j(X86Condition(false_high_cond), false_label);
+    } else {
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
+    }
+    // Must be equal high, so compare the lows.
+    __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
   }
   // The last comparison might be unsigned.
   __ j(final_condition, true_label);
@@ -1505,30 +1517,131 @@
                                /* false_target */ nullptr);
 }
 
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  // In 32 bit mode, a long condition doesn't generate a single CC either.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition()) {
+    Primitive::Type compare_type = condition->InputAt(0)->GetType();
+    if (compare_type == Primitive::kPrimLong ||
+        Primitive::IsFloatingPointType(compare_type)) {
+      return false;
+    }
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
+}
+
 void LocationsBuilderX86::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  Primitive::Type select_type = select->GetType();
-  HInstruction* cond = select->GetCondition();
-
-  if (Primitive::IsFloatingPointType(select_type)) {
+  if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::Any());
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      if (select->InputAt(1)->IsConstant()) {
+        // Cmov can't handle a constant value.
+        locations->SetInAt(1, Location::RequiresRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
+    } else {
+      locations->SetInAt(1, Location::Any());
+    }
   }
-  locations->SetInAt(1, Location::Any());
-  if (IsBooleanValueOrMaterializedCondition(cond)) {
-    locations->SetInAt(2, Location::Any());
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
 
+void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+  Register lhs_reg = lhs.AsRegister<Register>();
+  if (rhs.IsConstant()) {
+    int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+    codegen_->Compare32BitValue(lhs_reg, value);
+  } else if (rhs.IsStackSlot()) {
+    __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+  } else {
+    __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+  }
+}
+
 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
   LocationSummary* locations = select->GetLocations();
-  NearLabel false_target;
-  GenerateTestAndBranch<NearLabel>(
-      select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
+
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction. Condition codes are right.
+          cond = X86Condition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          Register cond_reg = locations->InAt(2).AsRegister<Register>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        // We can't handle FP or long here.
+        DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+        DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
+        LocationSummary* cond_locations = condition->GetLocations();
+        GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+        cond = X86Condition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      Register cond_reg = locations->InAt(2).AsRegister<Register>();
+      __ testl(cond_reg, cond_reg);
+    }
+
+    // If the condition is true, overwrite the output, which already contains false.
+    Location false_loc = locations->InAt(0);
+    Location true_loc = locations->InAt(1);
+    if (select->GetType() == Primitive::kPrimLong) {
+      // 64 bit conditional move.
+      Register false_high = false_loc.AsRegisterPairHigh<Register>();
+      Register false_low = false_loc.AsRegisterPairLow<Register>();
+      if (true_loc.IsRegisterPair()) {
+        __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
+        __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
+      } else {
+        __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
+        __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
+      }
+    } else {
+      // 32 bit conditional move.
+      Register false_reg = false_loc.AsRegister<Register>();
+      if (true_loc.IsRegister()) {
+        __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
+      } else {
+        __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
+      }
+    }
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(
+        select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1593,7 +1706,7 @@
   switch (cond->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+      locations->SetInAt(1, Location::Any());
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
@@ -1642,15 +1755,7 @@
 
       // Clear output register: setb only sets the low byte.
       __ xorl(reg, reg);
-
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
-      } else {
-        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
-      }
+      GenerateIntCompare(lhs, rhs);
       __ setb(X86Condition(cond->GetCondition()), reg);
       return;
     }
@@ -4128,15 +4233,7 @@
 
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimInt: {
-      Register left_reg = left.AsRegister<Register>();
-      if (right.IsConstant()) {
-        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
-        codegen_->Compare32BitValue(left_reg, value);
-      } else if (right.IsStackSlot()) {
-        __ cmpl(left_reg, Address(ESP, right.GetStackIndex()));
-      } else {
-        __ cmpl(left_reg, right.AsRegister<Register>());
-      }
+      GenerateIntCompare(left, right);
       break;
     }
     case Primitive::kPrimLong: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2fb6d60..63e9b2f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -297,6 +297,7 @@
                                    HBasicBlock* default_block);
 
   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
+  void GenerateIntCompare(Location lhs, Location rhs);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a53a6be..f3c40b1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6511,8 +6511,8 @@
   if (value == 0) {
     // Clears upper bits too.
     __ xorl(dest, dest);
-  } else if (value > 0 && IsInt<32>(value)) {
-    // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+  } else if (IsUint<32>(value)) {
+    // We can use a 32 bit move, as it will zero-extend and is shorter.
     __ movl(dest, Immediate(static_cast<int32_t>(value)));
   } else {
     __ movq(dest, Immediate(value));
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 57452cc..7ddabde 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -18,8 +18,28 @@
 
 namespace art {
 
-// This visitor tries to simplify operations that yield a constant. For example
-// `input * 0` is replaced by a null constant.
+// This visitor tries to simplify instructions that can be evaluated
+// as constants.
+class HConstantFoldingVisitor : public HGraphDelegateVisitor {
+ public:
+  explicit HConstantFoldingVisitor(HGraph* graph)
+      : HGraphDelegateVisitor(graph) {}
+
+ private:
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+  void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE;
+  void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE;
+
+  void VisitTypeConversion(HTypeConversion* inst) OVERRIDE;
+  void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE;
+
+  DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
+};
+
+// This visitor tries to simplify operations with an absorbing input,
+// yielding a constant. For example `input * 0` is replaced by a
+// null constant.
 class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
  public:
   explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
@@ -44,59 +64,69 @@
   void VisitXor(HXor* instruction) OVERRIDE;
 };
 
+
 void HConstantFolding::Run() {
-  InstructionWithAbsorbingInputSimplifier simplifier(graph_);
+  HConstantFoldingVisitor visitor(graph_);
   // Process basic blocks in reverse post-order in the dominator tree,
   // so that an instruction turned into a constant, used as input of
   // another instruction, may possibly be used to turn that second
   // instruction into a constant as well.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    // Traverse this block's instructions in (forward) order and
-    // replace the ones that can be statically evaluated by a
-    // compile-time counterpart.
-    for (HInstructionIterator inst_it(block->GetInstructions());
-         !inst_it.Done(); inst_it.Advance()) {
-      HInstruction* inst = inst_it.Current();
-      if (inst->IsBinaryOperation()) {
-        // Constant folding: replace `op(a, b)' with a constant at
-        // compile time if `a' and `b' are both constants.
-        HConstant* constant = inst->AsBinaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        } else {
-          inst->Accept(&simplifier);
-        }
-      } else if (inst->IsUnaryOperation()) {
-        // Constant folding: replace `op(a)' with a constant at compile
-        // time if `a' is a constant.
-        HConstant* constant = inst->AsUnaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsTypeConversion()) {
-        // Constant folding: replace `TypeConversion(a)' with a constant at
-        // compile time if `a' is a constant.
-        HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsDivZeroCheck()) {
-        // We can safely remove the check if the input is a non-null constant.
-        HDivZeroCheck* check = inst->AsDivZeroCheck();
-        HInstruction* check_input = check->InputAt(0);
-        if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
-          check->ReplaceWith(check_input);
-          check->GetBlock()->RemoveInstruction(check);
-        }
-      }
-    }
+  visitor.VisitReversePostOrder();
+}
+
+
+void HConstantFoldingVisitor::VisitBasicBlock(HBasicBlock* block) {
+  // Traverse this block's instructions (phis don't need to be
+  // processed) in (forward) order and replace the ones that can be
+  // statically evaluated by a compile-time counterpart.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    it.Current()->Accept(this);
   }
 }
 
+void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
+  // Constant folding: replace `op(a)' with a constant at compile
+  // time if `a' is a constant.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
+  // Constant folding: replace `op(a, b)' with a constant at
+  // compile time if `a' and `b' are both constants.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  } else {
+    InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
+    inst->Accept(&simplifier);
+  }
+}
+
+void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
+  // Constant folding: replace `TypeConversion(a)' with a constant at
+  // compile time if `a' is a constant.
+  HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
+  // We can safely remove the check if the input is a non-null constant.
+  HInstruction* check_input = inst->InputAt(0);
+  if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
+    inst->ReplaceWith(check_input);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+
 void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HInstruction* left = instruction->GetLeft();
@@ -178,7 +208,7 @@
         ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) ||
          (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) {
       // Replace code looking like
-      //    CMP{G,L} dst, src, NaN
+      //    CMP{G,L}-{FLOAT,DOUBLE} dst, src, NaN
       // with
       //    CONSTANT +1 (gt bias)
       // or
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 2698b2d..e10b1d6 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -26,13 +26,20 @@
  * Optimization pass performing a simple constant-expression
  * evaluation on the SSA form.
  *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
+ *
  * This class is named art::HConstantFolding to avoid name
  * clashes with the art::ConstantPropagation class defined in
  * compiler/dex/post_opt_passes.h.
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
+  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
       : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index e6e9177..4a49c83 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -593,8 +593,9 @@
       HBasicBlock* predecessor = loop_header->GetPredecessors()[i];
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
-            "Loop header %d has multiple incoming (non back edge) blocks.",
-            id));
+            "Loop header %d has multiple incoming (non back edge) blocks: %d.",
+            id,
+            predecessor->GetBlockId()));
       }
     }
   }
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4cf0eb1..c0263e4 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -384,6 +384,13 @@
         << array_set->GetValueCanBeNull() << std::noboolalpha;
   }
 
+  void VisitCompare(HCompare* compare) OVERRIDE {
+    ComparisonBias bias = compare->GetBias();
+    StartAttributeStream("bias") << (bias == ComparisonBias::kGtBias
+                                     ? "gt"
+                                     : (bias == ComparisonBias::kLtBias ? "lt" : "none"));
+  }
+
   void VisitInvoke(HInvoke* invoke) OVERRIDE {
     StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
     StartAttributeStream("method_name") << PrettyMethod(
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 68e96fb..02a1acc 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -190,28 +190,34 @@
   }
 }
 
-static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+static uint32_t FindClassIndexIn(mirror::Class* cls,
+                                 const DexFile& dex_file,
+                                 Handle<mirror::DexCache> dex_cache)
     SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t index = DexFile::kDexNoIndex;
   if (cls->GetDexCache() == nullptr) {
-    DCHECK(cls->IsArrayClass());
-    // TODO: find the class in `dex_file`.
-    return DexFile::kDexNoIndex;
+    DCHECK(cls->IsArrayClass()) << PrettyClass(cls);
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
   } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
     // TODO: deal with proxy classes.
-    return DexFile::kDexNoIndex;
   } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    index = cls->GetDexTypeIndex();
+  } else {
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
+  }
+
+  if (index != DexFile::kDexNoIndex) {
     // Update the dex cache to ensure the class is in. The generated code will
     // consider it is. We make it safe by updating the dex cache, as other
     // dex files might also load the class, and there is no guarantee the dex
     // cache of the dex file of the class will be updated.
-    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
-      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    if (dex_cache->GetResolvedType(index) == nullptr) {
+      dex_cache->SetResolvedType(index, cls);
     }
-    return cls->GetDexTypeIndex();
-  } else {
-    // TODO: find the class in `dex_file`.
-    return DexFile::kDexNoIndex;
   }
+
+  return index;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -258,8 +264,9 @@
   }
 
   if (actual_method != nullptr) {
-    return TryInline(invoke_instruction, actual_method);
+    return TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true);
   }
+
   DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
 
   // Check if we can use an inline cache.
@@ -302,7 +309,7 @@
                                                    uint32_t dex_pc) const {
   ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
   DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
-  return new (graph_->GetArena()) HInstanceFieldGet(
+  HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet(
       receiver,
       Primitive::kPrimNot,
       field->GetOffset(),
@@ -312,6 +319,9 @@
       *field->GetDexFile(),
       handles_->NewHandle(field->GetDexCache()),
       dex_pc);
+  // The class of a field is effectively final, and does not have any memory dependencies.
+  result->SetSideEffects(SideEffects::None());
+  return result;
 }
 
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
@@ -321,7 +331,8 @@
       << invoke_instruction->DebugName();
 
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  uint32_t class_index = FindClassIndexIn(
+      ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
   if (class_index == DexFile::kDexNoIndex) {
     VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
                    << " from inline cache is not inlined because its class is not"
@@ -344,37 +355,20 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) {
     return false;
   }
 
   // We successfully inlined, now add a guard.
-  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
-      class_linker, receiver, invoke_instruction->GetDexPc());
-
   bool is_referrer =
       (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
-  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
-                                                               class_index,
-                                                               caller_dex_file,
-                                                               is_referrer,
-                                                               invoke_instruction->GetDexPc(),
-                                                               /* needs_access_check */ false,
-                                                               /* is_in_dex_cache */ true);
-
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
-  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-      compare, invoke_instruction->GetDexPc());
-  // TODO: Extend reference type propagation to understand the guard.
-  if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
-  } else {
-    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
-  }
-  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
-  bb_cursor->InsertInstructionAfter(compare, load_class);
-  bb_cursor->InsertInstructionAfter(deoptimize, compare);
-  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  AddTypeGuard(receiver,
+               cursor,
+               bb_cursor,
+               class_index,
+               is_referrer,
+               invoke_instruction,
+               /* with_deoptimization */ true);
 
   // Run type propagation to get the guard typed, and eventually propagate the
   // type of the receiver.
@@ -385,11 +379,219 @@
   return true;
 }
 
+HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
+                                     HInstruction* cursor,
+                                     HBasicBlock* bb_cursor,
+                                     uint32_t class_index,
+                                     bool is_referrer,
+                                     HInstruction* invoke_instruction,
+                                     bool with_deoptimization) {
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  // Note that we will just compare the classes, so we don't need Java semantics access checks.
+  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
+  if (with_deoptimization) {
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
+  return compare;
+}
+
 bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         const InlineCache& ic) {
   DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
       << invoke_instruction->DebugName();
+
+  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) {
+    return true;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+
+  bool all_targets_inlined = true;
+  bool one_target_inlined = false;
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+
+    HInstruction* receiver = invoke_instruction->InputAt(0);
+    HInstruction* cursor = invoke_instruction->GetPrevious();
+    HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+    uint32_t class_index = FindClassIndexIn(
+        ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache());
+    HInstruction* return_replacement = nullptr;
+    if (class_index == DexFile::kDexNoIndex ||
+        !TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+      all_targets_inlined = false;
+    } else {
+      one_target_inlined = true;
+      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+
+      // If we have inlined all targets before, and this receiver is the last seen,
+      // we deoptimize instead of keeping the original invoke instruction.
+      bool deoptimize = all_targets_inlined &&
+          (i != InlineCache::kIndividualCacheSize - 1) &&
+          (ic.GetTypeAt(i + 1) == nullptr);
+      HInstruction* compare = AddTypeGuard(
+          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
+      if (deoptimize) {
+        if (return_replacement != nullptr) {
+          invoke_instruction->ReplaceWith(return_replacement);
+        }
+        invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+        // Because the inline cache data can be populated concurrently, we force the end of the
+        // iteration. Otherhwise, we could see a new receiver type.
+        break;
+      } else {
+        CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+      }
+    }
+  }
+
+  if (!one_target_inlined) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because none"
+                   << " of its targets could be inlined";
+    return false;
+  }
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  // Run type propagation to get the guards typed.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  rtp_fixup.Run();
+  return true;
+}
+
+void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                        HInstruction* return_replacement,
+                                                        HInstruction* invoke_instruction) {
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HBasicBlock* cursor_block = compare->GetBlock();
+  HBasicBlock* original_invoke_block = invoke_instruction->GetBlock();
+  ArenaAllocator* allocator = graph_->GetArena();
+
+  // Spit the block after the compare: `cursor_block` will now be the start of the diamond,
+  // and the returned block is the start of the then branch (that could contain multiple blocks).
+  HBasicBlock* then = cursor_block->SplitAfterForInlining(compare);
+
+  // Split the block containing the invoke before and after the invoke. The returned block
+  // of the split before will contain the invoke and will be the otherwise branch of
+  // the diamond. The returned block of the split after will be the merge block
+  // of the diamond.
+  HBasicBlock* end_then = invoke_instruction->GetBlock();
+  HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction);
+  HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction);
+
+  // If the methods we are inlining return a value, we create a phi in the merge block
+  // that will have the `invoke_instruction and the `return_replacement` as inputs.
+  if (return_replacement != nullptr) {
+    HPhi* phi = new (allocator) HPhi(
+        allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc);
+    merge->AddPhi(phi);
+    invoke_instruction->ReplaceWith(phi);
+    phi->AddInput(return_replacement);
+    phi->AddInput(invoke_instruction);
+  }
+
+  // Add the control flow instructions.
+  otherwise->AddInstruction(new (allocator) HGoto(dex_pc));
+  end_then->AddInstruction(new (allocator) HGoto(dex_pc));
+  cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc));
+
+  // Add the newly created blocks to the graph.
+  graph_->AddBlock(then);
+  graph_->AddBlock(otherwise);
+  graph_->AddBlock(merge);
+
+  // Set up successor (and implictly predecessor) relations.
+  cursor_block->AddSuccessor(otherwise);
+  cursor_block->AddSuccessor(then);
+  end_then->AddSuccessor(merge);
+  otherwise->AddSuccessor(merge);
+
+  // Set up dominance information.
+  then->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(then);
+  otherwise->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(otherwise);
+  merge->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(merge);
+
+  // Update the revert post order.
+  size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block);
+  MakeRoomFor(&graph_->reverse_post_order_, 1, index);
+  graph_->reverse_post_order_[++index] = then;
+  index = IndexOfElement(graph_->reverse_post_order_, end_then);
+  MakeRoomFor(&graph_->reverse_post_order_, 2, index);
+  graph_->reverse_post_order_[++index] = otherwise;
+  graph_->reverse_post_order_[++index] = merge;
+
+  // Set the loop information of the newly created blocks.
+  HLoopInformation* loop_info = cursor_block->GetLoopInformation();
+  if (loop_info != nullptr) {
+    then->SetLoopInformation(cursor_block->GetLoopInformation());
+    merge->SetLoopInformation(cursor_block->GetLoopInformation());
+    otherwise->SetLoopInformation(cursor_block->GetLoopInformation());
+    for (HLoopInformationOutwardIterator loop_it(*cursor_block);
+         !loop_it.Done();
+         loop_it.Advance()) {
+      loop_it.Current()->Add(then);
+      loop_it.Current()->Add(merge);
+      loop_it.Current()->Add(otherwise);
+    }
+    // In case the original invoke location was a back edge, we need to update
+    // the loop to now have the merge block as a back edge.
+    if (loop_info->IsBackEdge(*original_invoke_block)) {
+      loop_info->RemoveBackEdge(original_invoke_block);
+      loop_info->AddBackEdge(merge);
+    }
+  }
+
+  // Set the try/catch information of the newly created blocks.
+  then->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  merge->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  otherwise->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+}
+
+bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                                    ArtMethod* resolved_method,
+                                                    const InlineCache& ic) {
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJit());
   if (graph_->GetInstructionSet() == kMips64) {
@@ -431,7 +633,7 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInline(invoke_instruction, actual_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ false)) {
     return false;
   }
 
@@ -485,14 +687,29 @@
   return true;
 }
 
-bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  HInstruction* return_replacement = nullptr;
+  if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+    return false;
+  }
+  if (return_replacement != nullptr) {
+    invoke_instruction->ReplaceWith(return_replacement);
+  }
+  invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp);
+  return true;
+}
+
+bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
+                                 ArtMethod* method,
+                                 HInstruction** return_replacement) {
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
 
   // Check whether we're allowed to inline. The outermost compilation unit is the relevant
   // dex file here (though the transitivity of an inline chain would allow checking the calller).
   if (!compiler_driver_->MayInline(method->GetDexFile(),
                                    outer_compilation_unit_.GetDexFile())) {
-    if (TryPatternSubstitution(invoke_instruction, method, do_rtp)) {
+    if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
       VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method);
       MaybeRecordStat(kReplacedInvokeWithSimplePattern);
       return true;
@@ -541,8 +758,9 @@
 
   if (!method->GetDeclaringClass()->IsVerified()) {
     uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
-    if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
+    if (Runtime::Current()->UseJit() ||
+        !compiler_driver_->IsMethodVerifiedWithoutFailures(
+            method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -559,7 +777,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
+  if (!TryBuildAndInlineHelper(invoke_instruction, method, same_dex_file, return_replacement)) {
     return false;
   }
 
@@ -586,27 +804,27 @@
 // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
 bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
                                       ArtMethod* resolved_method,
-                                      bool do_rtp) {
+                                      HInstruction** return_replacement) {
   InlineMethod inline_method;
   if (!InlineMethodAnalyser::AnalyseMethodCode(resolved_method, &inline_method)) {
     return false;
   }
 
-  HInstruction* return_replacement = nullptr;
   switch (inline_method.opcode) {
     case kInlineOpNop:
       DCHECK_EQ(invoke_instruction->GetType(), Primitive::kPrimVoid);
+      *return_replacement = nullptr;
       break;
     case kInlineOpReturnArg:
-      return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction,
-                                                         inline_method.d.return_data.arg);
+      *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction,
+                                                          inline_method.d.return_data.arg);
       break;
     case kInlineOpNonWideConst:
       if (resolved_method->GetShorty()[0] == 'L') {
         DCHECK_EQ(inline_method.d.data, 0u);
-        return_replacement = graph_->GetNullConstant();
+        *return_replacement = graph_->GetNullConstant();
       } else {
-        return_replacement = graph_->GetIntConstant(static_cast<int32_t>(inline_method.d.data));
+        *return_replacement = graph_->GetIntConstant(static_cast<int32_t>(inline_method.d.data));
       }
       break;
     case kInlineOpIGet: {
@@ -621,7 +839,7 @@
       DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
       DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
       invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
-      return_replacement = iget;
+      *return_replacement = iget;
       break;
     }
     case kInlineOpIPut: {
@@ -639,7 +857,7 @@
       invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
       if (data.return_arg_plus1 != 0u) {
         size_t return_arg = data.return_arg_plus1 - 1u;
-        return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction, return_arg);
+        *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction, return_arg);
       }
       break;
     }
@@ -694,19 +912,13 @@
         HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
         invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
       }
+      *return_replacement = nullptr;
       break;
     }
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
   }
-
-  if (return_replacement != nullptr) {
-    invoke_instruction->ReplaceWith(return_replacement);
-  }
-  invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
-
-  FixUpReturnReferenceType(resolved_method, invoke_instruction, return_replacement, do_rtp);
   return true;
 }
 
@@ -760,10 +972,10 @@
   return iput;
 }
 
-bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
-                                 HInvoke* invoke_instruction,
-                                 bool same_dex_file,
-                                 bool do_rtp) {
+bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
+                                       ArtMethod* resolved_method,
+                                       bool same_dex_file,
+                                       HInstruction** return_replacement) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -771,16 +983,16 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
   DexCompilationUnit dex_compilation_unit(
-    nullptr,
-    caller_compilation_unit_.GetClassLoader(),
-    class_linker,
-    callee_dex_file,
-    code_item,
-    resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
-    method_index,
-    resolved_method->GetAccessFlags(),
-    compiler_driver_->GetVerifiedMethod(&callee_dex_file, method_index),
-    dex_cache);
+      nullptr,
+      caller_compilation_unit_.GetClassLoader(),
+      class_linker,
+      callee_dex_file,
+      code_item,
+      resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
+      method_index,
+      resolved_method->GetAccessFlags(),
+      /* verified_method */ nullptr,
+      dex_cache);
 
   bool requires_ctor_barrier = false;
 
@@ -873,7 +1085,7 @@
   HConstantFolding fold(callee_graph);
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
@@ -1016,16 +1228,12 @@
   }
   number_of_inlined_instructions_ += number_of_instructions;
 
-  HInstruction* return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
-  if (return_replacement != nullptr) {
-    DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
-  }
-  FixUpReturnReferenceType(resolved_method, invoke_instruction, return_replacement, do_rtp);
+  *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
   return true;
 }
 
-void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
-                                        HInvoke* invoke_instruction,
+void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
                                         HInstruction* return_replacement,
                                         bool do_rtp) {
   // Check the integrity of reference types and run another type propagation if needed.
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7d343c6..cdb2167 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -61,12 +61,25 @@
   bool TryInline(HInvoke* invoke_instruction);
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
-  // reference type propagation can run after the inlining.
-  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+  // reference type propagation can run after the inlining. If the inlining is successful, this
+  // method will replace and remove the `invoke_instruction`.
+  bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool TryBuildAndInline(HInvoke* invoke_instruction,
+                         ArtMethod* resolved_method,
+                         HInstruction** return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
+                               ArtMethod* resolved_method,
+                               bool same_dex_file,
+                               HInstruction** return_replacement);
+
   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
-  bool TryPatternSubstitution(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
+  bool TryPatternSubstitution(HInvoke* invoke_instruction,
+                              ArtMethod* resolved_method,
+                              HInstruction** return_replacement)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Create a new HInstanceFieldGet.
@@ -88,28 +101,80 @@
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  // Try to inline targets of a polymorphic call.
   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                 ArtMethod* resolved_method,
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool TryBuildAndInline(ArtMethod* resolved_method,
-                         HInvoke* invoke_instruction,
-                         bool same_dex_file,
-                         bool do_rtp = true);
+  bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                            ArtMethod* resolved_method,
+                                            const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
 
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
                                            uint32_t dex_pc) const
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void FixUpReturnReferenceType(ArtMethod* resolved_method,
-                                HInvoke* invoke_instruction,
+  void FixUpReturnReferenceType(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
                                 HInstruction* return_replacement,
                                 bool do_rtp)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Add a type guard on the given `receiver`. This will add to the graph:
+  // i0 = HFieldGet(receiver, klass)
+  // i1 = HLoadClass(class_index, is_referrer)
+  // i2 = HNotEqual(i0, i1)
+  //
+  // And if `with_deoptimization` is true:
+  // HDeoptimize(i2)
+  //
+  // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
+  HInstruction* AddTypeGuard(HInstruction* receiver,
+                             HInstruction* cursor,
+                             HBasicBlock* bb_cursor,
+                             uint32_t class_index,
+                             bool is_referrer,
+                             HInstruction* invoke_instruction,
+                             bool with_deoptimization)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  /*
+   * Ad-hoc implementation for implementing a diamond pattern in the graph for
+   * polymorphic inlining:
+   * 1) `compare` becomes the input of the new `HIf`.
+   * 2) Everything up until `invoke_instruction` is in the then branch (could
+   *    contain multiple blocks).
+   * 3) `invoke_instruction` is moved to the otherwise block.
+   * 4) If `return_replacement` is not null, the merge block will have
+   *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
+   *
+   * Before:
+   *             Block1
+   *             compare
+   *              ...
+   *         invoke_instruction
+   *
+   * After:
+   *            Block1
+   *            compare
+   *              if
+   *          /        \
+   *         /          \
+   *   Then block    Otherwise block
+   *      ...       invoke_instruction
+   *       \              /
+   *        \            /
+   *          Merge block
+   *  phi(return_replacement, invoke_instruction)
+   */
+  void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                HInstruction* return_replacement,
+                                                HInstruction* invoke_instruction);
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a48d06f..13d3f75 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -92,6 +92,7 @@
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
   void SimplifyCompare(HInvoke* invoke, bool has_zero_op);
+  void SimplifyIsNaN(HInvoke* invoke);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -1551,6 +1552,16 @@
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare);
 }
 
+void InstructionSimplifierVisitor::SimplifyIsNaN(HInvoke* invoke) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  // IsNaN(x) is the same as x != x.
+  HInstruction* x = invoke->InputAt(0);
+  HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc);
+  condition->SetBias(ComparisonBias::kLtBias);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, condition);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
   if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
     SimplifyStringEquals(instruction);
@@ -1568,6 +1579,9 @@
   } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum ||
              instruction->GetIntrinsic() == Intrinsics::kLongSignum) {
     SimplifyCompare(instruction, /* is_signum */ true);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kFloatIsNaN ||
+             instruction->GetIntrinsic() == Intrinsics::kDoubleIsNaN) {
+    SimplifyIsNaN(instruction);
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index cc4b6f6..7905104 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -25,6 +25,13 @@
 
 /**
  * Implements optimizations specific to each instruction.
+ *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
  */
 class InstructionSimplifier : public HOptimization {
  public:
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index db39bc8..316e86b 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -570,6 +570,7 @@
                                    NeedsEnvironmentOrCache(intrinsic),
                                    GetSideEffects(intrinsic),
                                    GetExceptions(intrinsic));
+              MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
             }
           }
         }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3bf3f7f..2ab50bb 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -33,8 +33,8 @@
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
-  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver)
-      : HOptimization(graph, kIntrinsicsRecognizerPassName),
+  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kIntrinsicsRecognizerPassName, stats),
         driver_(driver) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 00a158b..ea8669f 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1858,8 +1858,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1867,6 +1865,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4140d94..8741fd2 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1618,8 +1618,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1627,6 +1625,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 2294713..c862964 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -113,11 +113,10 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(A0));
-      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
-      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
-      UNREACHABLE();
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
     }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
@@ -825,6 +824,220 @@
              GetAssembler());
 }
 
+// byte libcore.io.Memory.peekByte(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  __ Lb(out, adr, 0);
+}
+
+// short libcore.io.Memory.peekShort(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Lh(out, adr, 0);
+  } else if (IsR2OrNewer()) {
+    // Unlike for words, there are no lhl/lhr instructions to load
+    // unaligned halfwords so the code loads individual bytes, in case
+    // the address isn't halfword-aligned, and assembles them into a
+    // signed halfword.
+    __ Lb(AT, adr, 1);   // This byte must be sign-extended.
+    __ Lb(out, adr, 0);  // This byte can be either sign-extended, or
+                         // zero-extended because the following
+                         // instruction overwrites the sign bits.
+    __ Ins(out, AT, 8, 24);
+  } else {
+    __ Lbu(AT, adr, 0);  // This byte must be zero-extended.  If it's not
+                         // the "or" instruction below will destroy the upper
+                         // 24 bits of the final result.
+    __ Lb(out, adr, 1);  // This byte must be sign-extended.
+    __ Sll(out, out, 8);
+    __ Or(out, out, AT);
+  }
+}
+
+// int libcore.io.Memory.peekInt(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Lw(out, adr, 0);
+  } else {
+    __ Lwr(out, adr, 0);
+    __ Lwl(out, adr, 3);
+  }
+}
+
+// long libcore.io.Memory.peekLong(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out_lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
+  Register out_hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
+
+  if (IsR6()) {
+    __ Lw(out_lo, adr, 0);
+    __ Lw(out_hi, adr, 4);
+  } else {
+    __ Lwr(out_lo, adr, 0);
+    __ Lwl(out_lo, adr, 3);
+    __ Lwr(out_hi, adr, 4);
+    __ Lwl(out_hi, adr, 7);
+  }
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+// void libcore.io.Memory.pokeByte(long address, byte value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeByte(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  __ Sb(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeShort(long address, short value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Sh(val, adr, 0);
+  } else {
+    // Unlike for words, there are no shl/shr instructions to store
+    // unaligned halfwords so the code stores individual bytes, in case
+    // the address isn't halfword-aligned.
+    __ Sb(val, adr, 0);
+    __ Srl(AT, val, 8);
+    __ Sb(AT, adr, 1);
+  }
+}
+
+// void libcore.io.Memory.pokeInt(long address, int value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Sw(val, adr, 0);
+  } else {
+    __ Swr(val, adr, 0);
+    __ Swl(val, adr, 3);
+  }
+}
+
+// void libcore.io.Memory.pokeLong(long address, long value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val_lo = invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>();
+  Register val_hi = invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>();
+
+  if (IsR6()) {
+    __ Sw(val_lo, adr, 0);
+    __ Sw(val_hi, adr, 4);
+  } else {
+    __ Swr(val_lo, adr, 0);
+    __ Swl(val_lo, adr, 3);
+    __ Swr(val_hi, adr, 4);
+    __ Swl(val_hi, adr, 7);
+  }
+}
+
+// char java.lang.String.charAt(int index)
+void IntrinsicLocationsBuilderMIPS::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+
+  // Location of reference to data array
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register idx = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  // TODO: Maybe we can support range check elimination. Overall,
+  //       though, I think it's not worth the cost.
+  // TODO: For simplicity, the index parameter is requested in a
+  //       register, so different from Quick we will not optimize the
+  //       code for constants (which would save a register).
+
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load the string size
+  __ Lw(TMP, obj, count_offset);
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Revert to slow path if idx is too large, or negative
+  __ Bgeu(idx, TMP, slow_path->GetEntryLabel());
+
+  // out = obj[2*idx].
+  __ Sll(TMP, idx, 1);                  // idx * 2
+  __ Addu(TMP, TMP, obj);               // Address of char at location idx
+  __ Lhu(out, TMP, value_offset);       // Load char at location idx
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // boolean java.lang.String.equals(Object anObject)
 void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -956,14 +1169,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRint)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekByte)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekIntNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekLongNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekShortNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeByte)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeIntNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeLongNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeShortNative)
 UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread)
 UNIMPLEMENTED_INTRINSIC(UnsafeGet)
 UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
@@ -983,7 +1188,6 @@
 UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
 UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
-UNIMPLEMENTED_INTRINSIC(StringCharAt)
 UNIMPLEMENTED_INTRINSIC(StringCompareTo)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
@@ -1016,8 +1220,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1025,6 +1227,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerCompare)
 UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index ac28503..cf3a365 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1764,8 +1764,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1773,6 +1771,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerCompare)
 UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index ab4f6f9..260a877 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -261,7 +261,8 @@
   locations->SetOut(Location::SameAsFirstInput());
   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
   DCHECK(static_or_direct != nullptr);
-  if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+  if (static_or_direct->HasSpecialInput() &&
+      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
     // We need addressibility for the constant area.
     locations->SetInAt(1, Location::RequiresRegister());
     // We need a temporary to hold the constant.
@@ -276,7 +277,7 @@
   Location output = locations->Out();
 
   DCHECK(output.IsFpuRegister());
-  if (locations->InAt(1).IsValid()) {
+  if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
     DCHECK(locations->InAt(1).IsRegister());
     // We also have a constant area pointer.
     Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -465,7 +466,7 @@
   // NaN handling.
   __ Bind(&nan);
   // Do we have a constant area pointer?
-  if (locations->InAt(2).IsValid()) {
+  if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
     DCHECK(locations->InAt(2).IsRegister());
     Register constant_area = locations->InAt(2).AsRegister<Register>();
     if (is_double) {
@@ -510,7 +511,8 @@
   locations->SetOut(Location::SameAsFirstInput());
   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
   DCHECK(static_or_direct != nullptr);
-  if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+  if (static_or_direct->HasSpecialInput() &&
+      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
     locations->SetInAt(2, Location::RequiresRegister());
   }
 }
@@ -2633,8 +2635,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -2642,6 +2642,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c9a4344..93e8c00 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2717,10 +2717,10 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index a6b4078..33bb2e8 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -141,6 +141,7 @@
             DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
+          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow()) {
           // If `instruction` can throw, we cannot move further instructions
           // that can throw as well.
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 0b5a0f1..bf56f53 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -26,8 +26,9 @@
 
 class LICM : public HOptimization {
  public:
-  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects)
-      : HOptimization(graph, kLoopInvariantCodeMotionPassName), side_effects_(side_effects) {}
+  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats),
+        side_effects_(side_effects) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 9fb32f4..d446539 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -79,7 +79,7 @@
     graph_->BuildDominatorTree();
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
-    LICM(graph_, side_effects).Run();
+    LICM(graph_, side_effects, nullptr).Run();
   }
 
   // General building fields.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca66f63..f36dc6e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -15,6 +15,8 @@
  */
 #include "nodes.h"
 
+#include <cfloat>
+
 #include "code_generator.h"
 #include "common_dominator.h"
 #include "ssa_builder.h"
@@ -27,6 +29,12 @@
 
 namespace art {
 
+// Enable floating-point static evaluation during constant folding
+// only if all floating-point operations and constants evaluate in the
+// range and precision of the type used (i.e., 32-bit float, 64-bit
+// double).
+static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == 0);
+
 void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) {
   ScopedObjectAccess soa(Thread::Current());
   // Create the inexact Object reference type and store it in the HGraph.
@@ -1159,6 +1167,12 @@
     return Evaluate(GetInput()->AsIntConstant());
   } else if (GetInput()->IsLongConstant()) {
     return Evaluate(GetInput()->AsLongConstant());
+  } else if (kEnableFloatingPointStaticEvaluation) {
+    if (GetInput()->IsFloatConstant()) {
+      return Evaluate(GetInput()->AsFloatConstant());
+    } else if (GetInput()->IsDoubleConstant()) {
+      return Evaluate(GetInput()->AsDoubleConstant());
+    }
   }
   return nullptr;
 }
@@ -1178,6 +1192,12 @@
     }
   } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
     return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
+  } else if (kEnableFloatingPointStaticEvaluation) {
+    if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
+      return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant());
+    } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) {
+      return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant());
+    }
   }
   return nullptr;
 }
@@ -1205,6 +1225,20 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
+  switch (rhs) {
+    case ComparisonBias::kNoBias:
+      return os << "no_bias";
+    case ComparisonBias::kGtBias:
+      return os << "gt_bias";
+    case ComparisonBias::kLtBias:
+      return os << "lt_bias";
+    default:
+      LOG(FATAL) << "Unknown ComparisonBias: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
@@ -1386,7 +1420,38 @@
   }
 }
 
-HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
+HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) {
+  DCHECK_EQ(cursor->GetBlock(), this);
+
+  HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
+                                                                    cursor->GetDexPc());
+  new_block->instructions_.first_instruction_ = cursor;
+  new_block->instructions_.last_instruction_ = instructions_.last_instruction_;
+  instructions_.last_instruction_ = cursor->previous_;
+  if (cursor->previous_ == nullptr) {
+    instructions_.first_instruction_ = nullptr;
+  } else {
+    cursor->previous_->next_ = nullptr;
+    cursor->previous_ = nullptr;
+  }
+
+  new_block->instructions_.SetBlockOfInstructions(new_block);
+
+  for (HBasicBlock* successor : GetSuccessors()) {
+    new_block->successors_.push_back(successor);
+    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
+  }
+  successors_.clear();
+
+  for (HBasicBlock* dominated : GetDominatedBlocks()) {
+    dominated->dominator_ = new_block;
+    new_block->dominated_blocks_.push_back(dominated);
+  }
+  dominated_blocks_.clear();
+  return new_block;
+}
+
+HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -1539,6 +1604,20 @@
   }
 }
 
+void HInstructionList::AddBefore(HInstruction* cursor, const HInstructionList& instruction_list) {
+  DCHECK(Contains(cursor));
+  if (!instruction_list.IsEmpty()) {
+    if (cursor == first_instruction_) {
+      first_instruction_ = instruction_list.first_instruction_;
+    } else {
+      cursor->previous_->next_ = instruction_list.first_instruction_;
+    }
+    instruction_list.last_instruction_->next_ = cursor;
+    instruction_list.first_instruction_->previous_ = cursor->previous_;
+    cursor->previous_ = instruction_list.last_instruction_;
+  }
+}
+
 void HInstructionList::Add(const HInstructionList& instruction_list) {
   if (IsEmpty()) {
     first_instruction_ = instruction_list.first_instruction_;
@@ -1781,18 +1860,6 @@
   graph_ = nullptr;
 }
 
-// Create space in `blocks` for adding `number_of_new_blocks` entries
-// starting at location `at`. Blocks after `at` are moved accordingly.
-static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
-                        size_t number_of_new_blocks,
-                        size_t after) {
-  DCHECK_LT(after, blocks->size());
-  size_t old_size = blocks->size();
-  size_t new_size = old_size + number_of_new_blocks;
-  blocks->resize(new_size);
-  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
-}
-
 void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
   DCHECK_EQ(block->GetGraph(), this);
   DCHECK(block->GetSuccessors().empty());
@@ -1846,7 +1913,8 @@
     DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
-    invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
+    // Note that we add instructions before the invoke only to simplify polymorphic inlining.
+    invoke->GetBlock()->instructions_.AddBefore(invoke, body->GetInstructions());
     body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock());
 
     // Replace the invoke with the return value of the inlined graph.
@@ -1864,7 +1932,8 @@
     // with the second half.
     ArenaAllocator* allocator = outer_graph->GetArena();
     HBasicBlock* at = invoke->GetBlock();
-    HBasicBlock* to = at->SplitAfter(invoke);
+    // Note that we split before the invoke only to simplify polymorphic inlining.
+    HBasicBlock* to = at->SplitBeforeForInlining(invoke);
 
     HBasicBlock* first = entry_block_->GetSuccessors()[0];
     DCHECK(!first->IsInLoop());
@@ -2030,13 +2099,6 @@
     }
   }
 
-  if (return_value != nullptr) {
-    invoke->ReplaceWith(return_value);
-  }
-
-  // Finally remove the invoke from the caller.
-  invoke->GetBlock()->RemoveInstruction(invoke);
-
   return return_value;
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 18b256f..399afab 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -131,6 +131,7 @@
   void SetBlockOfInstructions(HBasicBlock* block) const;
 
   void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
+  void AddBefore(HInstruction* cursor, const HInstructionList& instruction_list);
   void Add(const HInstructionList& instruction_list);
 
   // Return the number of instructions in the list. This is an expensive operation.
@@ -345,8 +346,9 @@
   void ComputeTryBlockInformation();
 
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
-  // Returns the instruction used to replace the invoke expression or null if the
-  // invoke is for a void method.
+  // Returns the instruction to replace the invoke expression or null if the
+  // invoke is for a void method. Note that the caller is responsible for replacing
+  // and removing the invoke instruction.
   HInstruction* InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
   // Need to add a couple of blocks to test if the loop body is entered and
@@ -617,6 +619,7 @@
 
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
+  friend class HInliner;             // For the reverse post order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -971,12 +974,15 @@
   // loop and try/catch information.
   HBasicBlock* SplitBefore(HInstruction* cursor);
 
-  // Split the block into two blocks just after `cursor`. Returns the newly
+  // Split the block into two blocks just before `cursor`. Returns the newly
   // created block. Note that this method just updates raw block information,
   // like predecessors, successors, dominators, and instruction list. It does not
   // update the graph, reverse post order, loop information, nor make sure the
   // blocks are consistent (for example ending with a control flow instruction).
-  HBasicBlock* SplitAfter(HInstruction* cursor);
+  HBasicBlock* SplitBeforeForInlining(HInstruction* cursor);
+
+  // Similar to `SplitBeforeForInlining` but does it after `cursor`.
+  HBasicBlock* SplitAfterForInlining(HInstruction* cursor);
 
   // Split catch block into two blocks after the original move-exception bytecode
   // instruction, or at the beginning if not present. Returns the newly created,
@@ -2062,6 +2068,7 @@
   }
 
   SideEffects GetSideEffects() const { return side_effects_; }
+  void SetSideEffects(SideEffects other) { side_effects_ = other; }
   void AddSideEffects(SideEffects other) { side_effects_.Add(other); }
 
   size_t GetLifetimePosition() const { return lifetime_position_; }
@@ -2100,7 +2107,6 @@
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
-  void SetSideEffects(SideEffects other) { side_effects_ = other; }
 
  private:
   void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
@@ -2393,7 +2399,7 @@
   }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsIntConstant());
+    DCHECK(other->IsIntConstant()) << other->DebugName();
     return other->AsIntConstant()->value_ == value_;
   }
 
@@ -2426,7 +2432,7 @@
   uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsLongConstant());
+    DCHECK(other->IsLongConstant()) << other->DebugName();
     return other->AsLongConstant()->value_ == value_;
   }
 
@@ -2448,6 +2454,92 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
+class HFloatConstant : public HConstant {
+ public:
+  float GetValue() const { return value_; }
+
+  uint64_t GetValueAsUint64() const OVERRIDE {
+    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
+  }
+
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsFloatConstant()) << other->DebugName();
+    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+
+  bool IsMinusOne() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
+  }
+  bool IsZero() const OVERRIDE {
+    return value_ == 0.0f;
+  }
+  bool IsOne() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
+  }
+
+  DECLARE_INSTRUCTION(FloatConstant);
+
+ private:
+  explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {}
+  explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {}
+
+  const float value_;
+
+  // Only the SsaBuilder and HGraph can create floating-point constants.
+  friend class SsaBuilder;
+  friend class HGraph;
+  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
+};
+
+class HDoubleConstant : public HConstant {
+ public:
+  double GetValue() const { return value_; }
+
+  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
+
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsDoubleConstant()) << other->DebugName();
+    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+
+  bool IsMinusOne() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
+  }
+  bool IsZero() const OVERRIDE {
+    return value_ == 0.0;
+  }
+  bool IsOne() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
+  }
+
+  DECLARE_INSTRUCTION(DoubleConstant);
+
+ private:
+  explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {}
+  explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {}
+
+  const double value_;
+
+  // Only the SsaBuilder and HGraph can create floating-point constants.
+  friend class SsaBuilder;
+  friend class HGraph;
+  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
+};
+
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
 class HIf : public HTemplateInstruction<1> {
@@ -2649,14 +2741,16 @@
     return true;
   }
 
-  // Try to statically evaluate `operation` and return a HConstant
-  // containing the result of this evaluation.  If `operation` cannot
+  // Try to statically evaluate `this` and return a HConstant
+  // containing the result of this evaluation.  If `this` cannot
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x`.
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
+  virtual HConstant* Evaluate(HFloatConstant* x) const = 0;
+  virtual HConstant* Evaluate(HDoubleConstant* x) const = 0;
 
   DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation);
 
@@ -2719,12 +2813,17 @@
     return true;
   }
 
-  // Try to statically evaluate `operation` and return a HConstant
-  // containing the result of this evaluation.  If `operation` cannot
+  // Try to statically evaluate `this` and return a HConstant
+  // containing the result of this evaluation.  If `this` cannot
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x` and `y`.
+  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                              HNullConstant* y ATTRIBUTE_UNUSED) const {
+    VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
+    return nullptr;
+  }
   virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
   virtual HConstant* Evaluate(HIntConstant* x ATTRIBUTE_UNUSED,
@@ -2737,11 +2836,8 @@
     VLOG(compiler) << DebugName() << " is not defined for the (long, int) case.";
     return nullptr;
   }
-  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
-                              HNullConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
-    return nullptr;
-  }
+  virtual HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const = 0;
+  virtual HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const = 0;
 
   // Returns an input that can legally be used as the right input and is
   // constant, or null.
@@ -2765,6 +2861,8 @@
   kLtBias,  // return -1 for NaN comparisons
 };
 
+std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs);
+
 class HCondition : public HBinaryOperation {
  public:
   HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
@@ -2782,7 +2880,7 @@
   virtual IfCondition GetOppositeCondition() const = 0;
 
   bool IsGtBias() const { return bias_ == ComparisonBias::kGtBias; }
-
+  ComparisonBias GetBias() const { return bias_; }
   void SetBias(ComparisonBias bias) { bias_ = bias; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
@@ -2790,17 +2888,34 @@
   }
 
   bool IsFPConditionTrueIfNaN() const {
-    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType()));
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
     return IsGtBias() ? ((if_cond == kCondGT) || (if_cond == kCondGE)) : (if_cond == kCondNE);
   }
 
   bool IsFPConditionFalseIfNaN() const {
-    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType()));
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
     return IsGtBias() ? ((if_cond == kCondLT) || (if_cond == kCondLE)) : (if_cond == kCondEQ);
   }
 
+ protected:
+  template <typename T>
+  int32_t Compare(T x, T y) const { return x > y ? 1 : (x < y ? -1 : 0); }
+
+  template <typename T>
+  int32_t CompareFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    DCHECK_NE(GetBias(), ComparisonBias::kNoBias);
+    // Handle the bias.
+    return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compare(x, y);
+  }
+
+  // Return an integer constant containing the result of a condition evaluated at compile time.
+  HIntConstant* MakeConstantCondition(bool value, uint32_t dex_pc) const {
+    return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc);
+  }
+
  private:
   // Needed if we merge a HCompare into a HCondition.
   ComparisonBias bias_;
@@ -2816,17 +2931,25 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                       HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(1);
+    return MakeConstantCondition(true, GetDexPc());
+  }
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HEqual instruction; evaluate it as
+  // `Compare(x, y) == 0`.
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0),
+                                 GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(Equal);
@@ -2852,17 +2975,24 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                       HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(0);
+    return MakeConstantCondition(false, GetDexPc());
+  }
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HNotEqual instruction; evaluate it as
+  // `Compare(x, y) != 0`.
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(NotEqual);
@@ -2887,12 +3017,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HLessThan instruction; evaluate it as
+  // `Compare(x, y) < 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(LessThan);
@@ -2917,12 +3054,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HLessThanOrEqual instruction; evaluate it as
+  // `Compare(x, y) <= 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(LessThanOrEqual);
@@ -2947,12 +3091,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HGreaterThan instruction; evaluate it as
+  // `Compare(x, y) > 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(GreaterThan);
@@ -2977,12 +3128,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HGreaterThanOrEqual instruction; evaluate it as
+  // `Compare(x, y) >= 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(GreaterThanOrEqual);
@@ -3007,14 +3165,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Below);
@@ -3028,7 +3192,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) < MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HBelow);
 };
@@ -3039,14 +3205,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(BelowOrEqual);
@@ -3060,7 +3232,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) <= MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
 };
@@ -3071,14 +3245,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Above);
@@ -3092,7 +3272,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) > MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HAbove);
 };
@@ -3103,14 +3285,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(AboveOrEqual);
@@ -3124,7 +3312,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) >= MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HAboveOrEqual);
 };
@@ -3149,15 +3339,32 @@
   }
 
   template <typename T>
-  int32_t Compute(T x, T y) const { return x == y ? 0 : x > y ? 1 : -1; }
+  int32_t Compute(T x, T y) const { return x > y ? 1 : (x < y ? -1 : 0); }
+
+  template <typename T>
+  int32_t ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    DCHECK_NE(GetBias(), ComparisonBias::kNoBias);
+    // Handle the bias.
+    return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y);
+  }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    // Note that there is no "cmp-int" Dex instruction so we shouldn't
+    // reach this code path when processing a freshly built HIR
+    // graph. However HCompare integer instructions can be synthesized
+    // by the instruction simplifier to implement IntegerCompare and
+    // IntegerSignum intrinsics, so we have to handle this case.
+    return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
@@ -3166,8 +3373,12 @@
 
   ComparisonBias GetBias() const { return bias_; }
 
-  bool IsGtBias() { return bias_ == ComparisonBias::kGtBias; }
-
+  // Does this compare instruction have a "gt bias" (vs an "lt bias")?
+  // Only meaninfgul for floating-point comparisons.
+  bool IsGtBias() const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    return bias_ == ComparisonBias::kGtBias;
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type type) {
     // MIPS64 uses a runtime call for FP comparisons.
@@ -3176,6 +3387,13 @@
 
   DECLARE_INSTRUCTION(Compare);
 
+ protected:
+  // Return an integer constant containing the result of a comparison evaluated at compile time.
+  HIntConstant* MakeConstantComparison(int32_t value, uint32_t dex_pc) const {
+    DCHECK(value == -1 || value == 0 || value == 1) << value;
+    return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc);
+  }
+
  private:
   const ComparisonBias bias_;
 
@@ -3233,92 +3451,6 @@
   DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
 };
 
-class HFloatConstant : public HConstant {
- public:
-  float GetValue() const { return value_; }
-
-  uint64_t GetValueAsUint64() const OVERRIDE {
-    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
-  }
-
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsFloatConstant());
-    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
-  }
-
-  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
-
-  bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
-  }
-  bool IsZero() const OVERRIDE {
-    return value_ == 0.0f;
-  }
-  bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
-  }
-  bool IsNaN() const {
-    return std::isnan(value_);
-  }
-
-  DECLARE_INSTRUCTION(FloatConstant);
-
- private:
-  explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {}
-  explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {}
-
-  const float value_;
-
-  // Only the SsaBuilder and HGraph can create floating-point constants.
-  friend class SsaBuilder;
-  friend class HGraph;
-  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
-};
-
-class HDoubleConstant : public HConstant {
- public:
-  double GetValue() const { return value_; }
-
-  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
-
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsDoubleConstant());
-    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
-  }
-
-  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
-
-  bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
-  }
-  bool IsZero() const OVERRIDE {
-    return value_ == 0.0;
-  }
-  bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
-  }
-  bool IsNaN() const {
-    return std::isnan(value_);
-  }
-
-  DECLARE_INSTRUCTION(DoubleConstant);
-
- private:
-  explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {}
-  explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {}
-
-  const double value_;
-
-  // Only the SsaBuilder and HGraph can create floating-point constants.
-  friend class SsaBuilder;
-  friend class HGraph;
-  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
-};
-
 class HNewInstance : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
@@ -3671,6 +3803,7 @@
   // method pointer; otherwise there may be one platform-specific special input,
   // such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+  bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); }
 
   InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
   void SetOptimizedInvokeType(InvokeType invoke_type) {
@@ -3869,6 +4002,12 @@
   HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(Compute(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(Compute(x->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Neg);
 
@@ -3935,6 +4074,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Add);
 
@@ -3960,6 +4107,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Sub);
 
@@ -3987,6 +4142,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Mul);
 
@@ -4003,7 +4166,8 @@
       : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
 
   template <typename T>
-  T Compute(T x, T y) const {
+  T ComputeIntegral(T x, T y) const {
+    DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType();
     // Our graph structure ensures we never have 0 for `y` during
     // constant folding.
     DCHECK_NE(y, 0);
@@ -4011,13 +4175,27 @@
     return (y == -1) ? -x : x / y;
   }
 
+  template <typename T>
+  T ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+    return x / y;
+  }
+
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -4040,7 +4218,8 @@
       : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
 
   template <typename T>
-  T Compute(T x, T y) const {
+  T ComputeIntegral(T x, T y) const {
+    DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType();
     // Our graph structure ensures we never have 0 for `y` during
     // constant folding.
     DCHECK_NE(y, 0);
@@ -4048,15 +4227,28 @@
     return (y == -1) ? 0 : x % y;
   }
 
+  template <typename T>
+  T ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+    return std::fmod(x, y);
+  }
+
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
-
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -4123,6 +4315,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Shl);
 
@@ -4159,6 +4361,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Shr);
 
@@ -4196,6 +4408,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(UShr);
 
@@ -4232,6 +4454,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(And);
 
@@ -4268,6 +4500,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Or);
 
@@ -4304,6 +4546,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Xor);
 
@@ -4342,6 +4594,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Ror);
 
@@ -4408,6 +4670,14 @@
   HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Not);
 
@@ -4426,7 +4696,7 @@
   }
 
   template <typename T> bool Compute(T x) const {
-    DCHECK(IsUint<1>(x));
+    DCHECK(IsUint<1>(x)) << x;
     return !x;
   }
 
@@ -4437,6 +4707,14 @@
     LOG(FATAL) << DebugName() << " is not defined for long values";
     UNREACHABLE();
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(BooleanNot);
 
@@ -4784,10 +5062,10 @@
       DCHECK_EQ(GetArray(), other->GetArray());
       DCHECK_EQ(GetIndex(), other->GetIndex());
       if (Primitive::IsIntOrLongType(GetType())) {
-        DCHECK(Primitive::IsFloatingPointType(other->GetType()));
+        DCHECK(Primitive::IsFloatingPointType(other->GetType())) << other->GetType();
       } else {
-        DCHECK(Primitive::IsFloatingPointType(GetType()));
-        DCHECK(Primitive::IsIntOrLongType(other->GetType()));
+        DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+        DCHECK(Primitive::IsIntOrLongType(other->GetType())) << other->GetType();
       }
     }
     return result;
@@ -6002,7 +6280,7 @@
   } else if (constant->IsLongConstant()) {
     return constant->AsLongConstant()->GetValue();
   } else {
-    DCHECK(constant->IsNullConstant());
+    DCHECK(constant->IsNullConstant()) << constant->DebugName();
     return 0;
   }
 }
@@ -6097,6 +6375,18 @@
   DISALLOW_COPY_AND_ASSIGN(SwitchTable);
 };
 
+// Create space in `blocks` for adding `number_of_new_blocks` entries
+// starting at location `at`. Blocks after `at` are moved accordingly.
+inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
+                        size_t number_of_new_blocks,
+                        size_t after) {
+  DCHECK_LT(after, blocks->size());
+  size_t old_size = blocks->size();
+  size_t new_size = old_size + number_of_new_blocks;
+  blocks->resize(new_size);
+  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 12b748b..b1891c9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -505,12 +505,12 @@
       graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
-  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
-  LICM* licm = new (arena) LICM(graph, *side_effects);
+  LICM* licm = new (arena) LICM(graph, *side_effects, stats);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
@@ -519,7 +519,7 @@
       graph, stats, "instruction_simplifier_after_bce");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
-  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
+  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -651,7 +651,7 @@
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
-    compiler_driver->GetVerifiedMethod(&dex_file, method_idx), dex_cache);
+    nullptr, dex_cache);
 
   bool requires_barrier = dex_compilation_unit.IsConstructor()
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 52a7b10..179004b 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -56,6 +56,10 @@
   kMonomorphicCall,
   kPolymorphicCall,
   kMegamorphicCall,
+  kBooleanSimplified,
+  kIntrinsicRecognized,
+  kLoopInvariantMoved,
+  kSelectGenerated,
   kLastStat
 };
 
@@ -124,7 +128,11 @@
       case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
       case kMonomorphicCall: name = "MonomorphicCall"; break;
       case kPolymorphicCall: name = "PolymorphicCall"; break;
-      case kMegamorphicCall: name = "kMegamorphicCall"; break;
+      case kMegamorphicCall: name = "MegamorphicCall"; break;
+      case kBooleanSimplified : name = "BooleanSimplified"; break;
+      case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
+      case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
+      case kSelectGenerated : name = "SelectGenerated"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 324d84f..0ad104e 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -138,15 +138,7 @@
   }
 
   if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) {
-    if (GetGraph()->GetInstructionSet() == kX86) {
-      // Long values and long condition inputs result in 8 required core registers.
-      // We don't have that many on x86. Materialize the condition in such case.
-      return user->GetType() != Primitive::kPrimLong ||
-             condition->InputAt(1)->GetType() != Primitive::kPrimLong ||
-             condition->InputAt(1)->IsConstant();
-    } else {
-      return true;
-    }
+    return true;
   }
 
   return false;
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 105b30a..e52476e 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -141,6 +141,8 @@
       block->MergeWith(merge_block);
     }
 
+    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
     // entry block. Any following blocks would have had the join block
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index f9d6d4d..c6dca58 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -47,8 +47,8 @@
 
 class HSelectGenerator : public HOptimization {
  public:
-  explicit HSelectGenerator(HGraph* graph)
-    : HOptimization(graph, kSelectGeneratorPassName) {}
+  HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, kSelectGeneratorPassName, stats) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 43f2499..09ca8b7 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@
   return true;
 }
 
+static bool HasAliasInEnvironments(HInstruction* instruction) {
+  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+       !use_it.Done();
+       use_it.Advance()) {
+    HEnvironment* use = use_it.Current()->GetUser();
+    HUseListNode<HEnvironment*>* next = use_it.Current()->GetNext();
+    if (next != nullptr && next->GetUser() == use) {
+      return true;
+    }
+  }
+
+  if (kIsDebugBuild) {
+    // Do a quadratic search to ensure same environment uses are next
+    // to each other.
+    for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+         !use_it.Done();
+         use_it.Advance()) {
+      HUseListNode<HEnvironment*>* current = use_it.Current();
+      HUseListNode<HEnvironment*>* next = current->GetNext();
+      while (next != nullptr) {
+        DCHECK(next->GetUser() != current->GetUser());
+        next = next->GetNext();
+      }
+    }
+  }
+  return false;
+}
+
 void SsaBuilder::RemoveRedundantUninitializedStrings() {
   if (GetGraph()->IsDebuggable()) {
     // Do not perform the optimization for consistency with the interpreter
@@ -433,7 +461,7 @@
     // Replace NewInstance of String with NullConstant if not used prior to
     // calling StringFactory. In case of deoptimization, the interpreter is
     // expected to skip null check on the `this` argument of the StringFactory call.
-    if (!new_instance->HasNonEnvironmentUses()) {
+    if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
       new_instance->ReplaceWith(GetGraph()->GetNullConstant());
       new_instance->GetBlock()->RemoveInstruction(new_instance);
 
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ac9c097..6fd65ee 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -426,6 +426,16 @@
   EmitI(0x23, rs, rt, imm16);
 }
 
+void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x22, rs, rt, imm16);
+}
+
+void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x26, rs, rt, imm16);
+}
+
 void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x24, rs, rt, imm16);
 }
@@ -465,6 +475,16 @@
   EmitI(0x2b, rs, rt, imm16);
 }
 
+void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x2a, rs, rt, imm16);
+}
+
+void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x2e, rs, rt, imm16);
+}
+
 void MipsAssembler::Slt(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x2a);
 }
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 01c6490..2262af4 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -162,6 +162,8 @@
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
   void Lw(Register rt, Register rs, uint16_t imm16);
+  void Lwl(Register rt, Register rs, uint16_t imm16);
+  void Lwr(Register rt, Register rs, uint16_t imm16);
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
   void Lui(Register rt, uint16_t imm16);
@@ -172,6 +174,8 @@
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
   void Sw(Register rt, Register rs, uint16_t imm16);
+  void Swl(Register rt, Register rs, uint16_t imm16);
+  void Swr(Register rt, Register rs, uint16_t imm16);
 
   void Slt(Register rd, Register rs, Register rt);
   void Sltu(Register rd, Register rs, Register rt);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 5fc3dee..9e27f07 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -335,6 +335,18 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "Nor");
 }
 
+//////////
+// MISC //
+//////////
+
+TEST_F(AssemblerMIPSTest, Movz) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Movz, "movz ${reg1}, ${reg2}, ${reg3}"), "Movz");
+}
+
+TEST_F(AssemblerMIPSTest, Movn) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Movn, "movn ${reg1}, ${reg2}, ${reg3}"), "Movn");
+}
+
 TEST_F(AssemblerMIPSTest, Seb) {
   DriverStr(RepeatRR(&mips::MipsAssembler::Seb, "seb ${reg1}, ${reg2}"), "Seb");
 }
@@ -363,6 +375,10 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srlv, "srlv ${reg1}, ${reg2}, ${reg3}"), "Srlv");
 }
 
+TEST_F(AssemblerMIPSTest, Rotrv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Rotrv, "rotrv ${reg1}, ${reg2}, ${reg3}"), "rotrv");
+}
+
 TEST_F(AssemblerMIPSTest, Srav) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
 }
@@ -405,6 +421,14 @@
   DriverStr(expected, "Ext");
 }
 
+TEST_F(AssemblerMIPSTest, ClzR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::ClzR2, "clz ${reg1}, ${reg2}"), "clzR2");
+}
+
+TEST_F(AssemblerMIPSTest, CloR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::CloR2, "clo ${reg1}, ${reg2}"), "cloR2");
+}
+
 TEST_F(AssemblerMIPSTest, Lb) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
 }
@@ -413,10 +437,18 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "Lh");
 }
 
+TEST_F(AssemblerMIPSTest, Lwl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lwl, -16, "lwl ${reg1}, {imm}(${reg2})"), "Lwl");
+}
+
 TEST_F(AssemblerMIPSTest, Lw) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "Lw");
 }
 
+TEST_F(AssemblerMIPSTest, Lwr) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lwr, -16, "lwr ${reg1}, {imm}(${reg2})"), "Lwr");
+}
+
 TEST_F(AssemblerMIPSTest, Lbu) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "Lbu");
 }
@@ -445,10 +477,18 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "Sh");
 }
 
+TEST_F(AssemblerMIPSTest, Swl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Swl, -16, "swl ${reg1}, {imm}(${reg2})"), "Swl");
+}
+
 TEST_F(AssemblerMIPSTest, Sw) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "Sw");
 }
 
+TEST_F(AssemblerMIPSTest, Swr) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Swr, -16, "swr ${reg1}, {imm}(${reg2})"), "Swr");
+}
+
 TEST_F(AssemblerMIPSTest, Slt) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "Slt");
 }
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 77f8d6c..dfc379f 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -55,20 +55,42 @@
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain,art/compiler,target,debug,$(dex2oat_target_arch)))
 endif
 
+# Note: the order is important because of static linking resolution.
+DEX2OAT_STATIC_DEPENDENCIES := \
+  libziparchive-host \
+  libnativehelper \
+  libnativebridge \
+  libnativeloader \
+  libsigchain_dummy \
+  libvixl \
+  liblog \
+  libz \
+  libbacktrace \
+  libLLVMObject \
+  libLLVMBitReader \
+  libLLVMMC \
+  libLLVMMCParser \
+  libLLVMCore \
+  libLLVMSupport \
+  libcutils \
+  libunwindbacktrace \
+  libutils \
+  libbase \
+  liblz4 \
+  liblzma
+
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,debug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index f922687..428266f 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -306,13 +306,17 @@
 
   { kITypeMask, 32u << kOpcodeShift, "lb", "TO", },
   { kITypeMask, 33u << kOpcodeShift, "lh", "TO", },
+  { kITypeMask, 34u << kOpcodeShift, "lwl", "TO", },
   { kITypeMask, 35u << kOpcodeShift, "lw", "TO", },
   { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", },
   { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", },
+  { kITypeMask, 38u << kOpcodeShift, "lwr", "TO", },
   { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", },
   { kITypeMask, 40u << kOpcodeShift, "sb", "TO", },
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
+  { kITypeMask, 42u << kOpcodeShift, "swl", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
+  { kITypeMask, 46u << kOpcodeShift, "swr", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
   { kJTypeMask, 50u << kOpcodeShift, "bc", "P" },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
diff --git a/runtime/Android.mk b/runtime/Android.mk
index e9f7add..947de8a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -238,6 +238,7 @@
 # (empty) body is called.
 JIT_DEBUG_REGISTER_CODE_LDFLAGS := -Wl,--keep-unique,__jit_debug_register_code
 LIBART_TARGET_LDFLAGS_arm    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_arm64  := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
 LIBART_TARGET_LDFLAGS_x86    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
 LIBART_TARGET_LDFLAGS_x86_64 := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
 JIT_DEBUG_REGISTER_CODE_LDFLAGS :=
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 078a978..f3e8d6b 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -132,6 +132,11 @@
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
+  // Returns true if this method might be copied from another class.
+  bool MightBeCopied() {
+    return IsMiranda() || IsDefault() || IsDefaultConflicting();
+  }
+
   bool IsMiranda() {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 771b2d0..a4b38ea 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -222,6 +222,10 @@
 }
 
 ArenaPool::~ArenaPool() {
+  ReclaimMemory();
+}
+
+void ArenaPool::ReclaimMemory() {
   while (free_arenas_ != nullptr) {
     auto* arena = free_arenas_;
     free_arenas_ = free_arenas_->next_;
@@ -229,6 +233,11 @@
   }
 }
 
+void ArenaPool::LockReclaimMemory() {
+  MutexLock lock(Thread::Current(), lock_);
+  ReclaimMemory();
+}
+
 Arena* ArenaPool::AllocArena(size_t size) {
   Thread* self = Thread::Current();
   Arena* ret = nullptr;
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 36334c4..8a96571 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -276,6 +276,8 @@
   Arena* AllocArena(size_t size) REQUIRES(!lock_);
   void FreeArenaChain(Arena* first) REQUIRES(!lock_);
   size_t GetBytesAllocated() const REQUIRES(!lock_);
+  void ReclaimMemory() NO_THREAD_SAFETY_ANALYSIS;
+  void LockReclaimMemory() REQUIRES(!lock_);
   // Trim the maps in arenas by madvising, used by JIT to reduce memory usage. This only works
   // use_malloc is false.
   void TrimMaps() REQUIRES(!lock_);
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index de46b0c..8aaeaac 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -37,6 +37,7 @@
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
   bool class_linker;  // Enabled with "-verbose:class".
+  bool collector;
   bool compiler;
   bool deopt;
   bool gc;
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 82a5f96..6972b3e 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -1009,10 +1009,6 @@
     DCHECK(alloc_tracker_lock_ == nullptr);
     alloc_tracker_lock_ = new Mutex("AllocTracker lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kInterpreterStringInitMapLock);
-    DCHECK(interpreter_string_init_map_lock_ == nullptr);
-    interpreter_string_init_map_lock_ = new Mutex("Interpreter String initializer reference map lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock);
     DCHECK(thread_list_lock_ == nullptr);
     thread_list_lock_ = new Mutex("thread list lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index f674a6f..e72f2a2 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -102,7 +102,6 @@
   kMonitorListLock,
   kJniLoadLibraryLock,
   kThreadListLock,
-  kInterpreterStringInitMapLock,
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
diff --git a/runtime/base/scoped_arena_containers.h b/runtime/base/scoped_arena_containers.h
index 1236585..bd19d00 100644
--- a/runtime/base/scoped_arena_containers.h
+++ b/runtime/base/scoped_arena_containers.h
@@ -201,20 +201,29 @@
 template <typename T>
 class ArenaDelete {
   static constexpr uint8_t kMagicFill = 0xCE;
- public:
-  void operator()(T* ptr) const {
-    ptr->~T();
+
+ protected:
+  // Used for variable sized objects such as RegisterLine.
+  ALWAYS_INLINE void ProtectMemory(T* ptr, size_t size) const {
     if (RUNNING_ON_MEMORY_TOOL > 0) {
-      // Writing to the memory will fail if it we already destroyed the pointer with
+      // Writing to the memory will fail ift we already destroyed the pointer with
       // DestroyOnlyDelete since we make it no access.
-      memset(ptr, kMagicFill, sizeof(T));
-      MEMORY_TOOL_MAKE_NOACCESS(ptr, sizeof(T));
+      memset(ptr, kMagicFill, size);
+      MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
     } else if (kIsDebugBuild) {
       CHECK(ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) == ArenaFreeTag::kUsed)
           << "Freeing invalid object " << ptr;
       ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) = ArenaFreeTag::kFree;
       // Write a magic value to try and catch use after free error.
-      memset(ptr, kMagicFill, sizeof(T));
+      memset(ptr, kMagicFill, size);
+    }
+  }
+
+ public:
+  void operator()(T* ptr) const {
+    if (ptr != nullptr) {
+      ptr->~T();
+      ProtectMemory(ptr, sizeof(T));
     }
   }
 };
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5278d1b..936c988 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -759,7 +759,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
     CHECK(m->GetDeclaringClass() == nullptr) << PrettyMethod(m);
-  } else if (m->IsMiranda()) {
+  } else if (m->MightBeCopied()) {
     CHECK(m->GetDeclaringClass() != nullptr) << PrettyMethod(m);
   } else if (expected_class != nullptr) {
     CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << PrettyMethod(m);
@@ -1137,18 +1137,18 @@
 
   virtual void Visit(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
     GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(sizeof(void*));
-    const bool is_miranda = method->IsMiranda();
+    const bool maybe_copied = method->MightBeCopied();
     if (resolved_types != nullptr) {
       bool in_image_space = false;
-      if (kIsDebugBuild || is_miranda) {
+      if (kIsDebugBuild || maybe_copied) {
         in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
             reinterpret_cast<const uint8_t*>(resolved_types) - header_.GetImageBegin());
       }
       // Must be in image space for non-miranda method.
-      DCHECK(is_miranda || in_image_space)
+      DCHECK(maybe_copied || in_image_space)
           << resolved_types << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
-      if (!is_miranda || in_image_space) {
+      if (!maybe_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
                                          sizeof(void*));
@@ -1157,15 +1157,15 @@
     ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(sizeof(void*));
     if (resolved_methods != nullptr) {
       bool in_image_space = false;
-      if (kIsDebugBuild || is_miranda) {
+      if (kIsDebugBuild || maybe_copied) {
         in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
               reinterpret_cast<const uint8_t*>(resolved_methods) - header_.GetImageBegin());
       }
       // Must be in image space for non-miranda method.
-      DCHECK(is_miranda || in_image_space)
+      DCHECK(maybe_copied || in_image_space)
           << resolved_methods << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
-      if (!is_miranda || in_image_space) {
+      if (!maybe_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
                                            sizeof(void*));
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 3a0f3e5..5c3029a 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -263,7 +263,7 @@
     for (ArtMethod& method : klass->GetCopiedMethods(sizeof(void*))) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
-      EXPECT_TRUE(method.IsMiranda() || method.IsDefault() || method.IsDefaultConflicting());
+      EXPECT_TRUE(method.MightBeCopied());
       EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
           << "declaring class: " << PrettyClass(method.GetDeclaringClass());
       EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
@@ -1225,12 +1225,12 @@
   dex_cache->SetLocation(location.Get());
   const DexFile* old_dex_file = dex_cache->GetDexFile();
 
-  DexFile* dex_file = new DexFile(old_dex_file->Begin(),
-                                  old_dex_file->Size(),
-                                  location->ToModifiedUtf8(),
-                                  0u,
-                                  nullptr,
-                                  nullptr);
+  std::unique_ptr<DexFile> dex_file(new DexFile(old_dex_file->Begin(),
+                                                old_dex_file->Size(),
+                                                location->ToModifiedUtf8(),
+                                                0u,
+                                                nullptr,
+                                                nullptr));
   {
     WriterMutexLock mu(soa.Self(), *class_linker->DexLock());
     // Check that inserting with a UTF16 name works.
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 369e408..83e5bad 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -34,11 +34,7 @@
 
 const char* AllocRecord::GetClassDescriptor(std::string* storage) const {
   // klass_ could contain null only if we implement class unloading.
-  if (UNLIKELY(klass_.IsNull())) {
-    return "null";
-  } else {
-    return klass_.Read()->GetDescriptor(storage);
-  }
+  return klass_.IsNull() ? "null" : klass_.Read()->GetDescriptor(storage);
 }
 
 void AllocRecordObjectMap::SetProperties() {
@@ -105,8 +101,19 @@
   size_t count = recent_record_max_;
   // Only visit the last recent_record_max_ number of allocation records in entries_ and mark the
   // klass_ fields as strong roots.
-  for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
-    buffered_visitor.VisitRootIfNonNull(it->second->GetClassGcRoot());
+  for (auto it = entries_.rbegin(), end = entries_.rend(); it != end; ++it) {
+    AllocRecord* record = it->second;
+    if (count > 0) {
+      buffered_visitor.VisitRootIfNonNull(record->GetClassGcRoot());
+      --count;
+    }
+    // Visit all of the stack frames to make sure no methods in the stack traces get unloaded by
+    // class unloading.
+    for (size_t i = 0, depth = record->GetDepth(); i < depth; ++i) {
+      const AllocRecordStackTraceElement& element = record->StackElement(i);
+      DCHECK(element.GetMethod() != nullptr);
+      element.GetMethod()->VisitRoots(buffered_visitor, sizeof(void*));
+    }
   }
 }
 
@@ -131,12 +138,7 @@
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0, count = 0;
   // Only the first (size - recent_record_max_) number of records can be deleted.
-  size_t delete_bound;
-  if (entries_.size() <= recent_record_max_) {
-    delete_bound = 0;
-  } else {
-    delete_bound = entries_.size() - recent_record_max_;
-  }
+  const size_t delete_bound = std::max(entries_.size(), recent_record_max_) - recent_record_max_;
   for (auto it = entries_.begin(), end = entries_.end(); it != end;) {
     ++count;
     // This does not need a read barrier because this is called by GC.
@@ -187,7 +189,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         trace(trace_in),
-        depth(0),
         max_depth(max) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
@@ -209,7 +210,7 @@
   }
 
   AllocRecordStackTrace* trace;
-  size_t depth;
+  size_t depth = 0u;
   const size_t max_depth;
 };
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 8b125dd..2c487fe 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -58,10 +58,16 @@
       page_release_mode_(page_release_mode),
       page_release_size_threshold_(page_release_size_threshold),
       is_running_on_memory_tool_(running_on_memory_tool) {
+  DCHECK_ALIGNED(base, kPageSize);
   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
   CHECK_LE(capacity, max_capacity);
   CHECK_ALIGNED(page_release_size_threshold_, kPageSize);
+  // Zero the memory explicitly (don't rely on that the mem map is zero-initialized).
+  if (!kMadviseZeroes) {
+    memset(base_, 0, max_capacity);
+  }
+  CHECK_EQ(madvise(base_, max_capacity, MADV_DONTNEED), 0);
   if (!initialized_) {
     Initialize();
   }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index a472a8b..b12cb5b 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -192,6 +192,7 @@
         Verify();
       }
       DCHECK(slot != nullptr);
+      DCHECK(slot->Next() == nullptr);
       Slot** headp = reinterpret_cast<Slot**>(&head_);
       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
       Slot* old_head = *headp;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 8e1b7f4..d393f0b 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1622,7 +1622,9 @@
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   ConcurrentCopyingRefFieldsVisitor visitor(this);
-  to_ref->VisitReferences(visitor, visitor);
+  // Disable the read barrier for a performance reason.
+  to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+      visitor, visitor);
 }
 
 // Process a field.
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index b60426d..c9ac435 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -66,6 +66,10 @@
     return end_;
   }
 
+  size_t Size() const {
+    return size_;
+  }
+
  private:
   void UpdateSize() {
     size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
index 8f9a9e2..26da4ca 100644
--- a/runtime/gc/collector/immune_spaces.cc
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -18,6 +18,7 @@
 
 #include "gc/space/space-inl.h"
 #include "mirror/object.h"
+#include "oat_file.h"
 
 namespace art {
 namespace gc {
@@ -45,11 +46,16 @@
       space::ImageSpace* image_space = space->AsImageSpace();
       // Update the end to include the other non-heap sections.
       space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize);
-      uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin());
-      uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd());
-      if (space_end == oat_begin) {
-        DCHECK_GE(oat_end, oat_begin);
-        space_end = oat_end;
+      // For the app image case, GetOatFileBegin is where the oat file was mapped during image
+      // creation, the actual oat file could be somewhere else.
+      const OatFile* const image_oat_file = image_space->GetOatFile();
+      if (image_oat_file != nullptr) {
+        uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_oat_file->Begin());
+        uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_oat_file->End());
+        if (space_end == oat_begin) {
+          DCHECK_GE(oat_end, oat_begin);
+          space_end = oat_end;
+        }
       }
     }
     if (cur_begin == 0u) {
@@ -71,6 +77,8 @@
   }
   largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin));
   largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end));
+  VLOG(collector) << "Immune region " << largest_immune_region_.Begin() << "-"
+                  << largest_immune_region_.End();
 }
 
 void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) {
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index ea290dd..56838f5 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -72,17 +72,31 @@
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit());
 }
 
+class DummyOatFile : public OatFile {
+ public:
+  DummyOatFile(uint8_t* begin, uint8_t* end) : OatFile("Location", /*is_executable*/ false) {
+    begin_ = begin;
+    end_ = end;
+  }
+};
+
 class DummyImageSpace : public space::ImageSpace {
  public:
-  DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap)
+  DummyImageSpace(MemMap* map,
+                  accounting::ContinuousSpaceBitmap* live_bitmap,
+                  std::unique_ptr<DummyOatFile>&& oat_file)
       : ImageSpace("DummyImageSpace",
                    /*image_location*/"",
                    map,
                    live_bitmap,
-                   map->End()) {}
+                   map->End()) {
+    oat_file_ = std::move(oat_file);
+    oat_file_non_owned_ = oat_file_.get();
+  }
 
-  // OatSize is how large the oat file is after the image.
-  static DummyImageSpace* Create(size_t size, size_t oat_size) {
+  // Size is the size of the image space, oat offset is where the oat file is located
+  // after the end of image space. oat_size is the size of the oat file.
+  static DummyImageSpace* Create(size_t size, size_t oat_offset, size_t oat_size) {
     std::string error_str;
     std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace",
                                                      nullptr,
@@ -100,6 +114,9 @@
     if (live_bitmap == nullptr) {
       return nullptr;
     }
+    // The actual mapped oat file may not be directly after the image for the app image case.
+    std::unique_ptr<DummyOatFile> oat_file(new DummyOatFile(map->End() + oat_offset,
+                                                            map->End() + oat_offset + oat_size));
     // Create image header.
     ImageSection sections[ImageHeader::kSectionCount];
     new (map->Begin()) ImageHeader(
@@ -108,6 +125,7 @@
         sections,
         /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1,
         /*oat_checksum*/0u,
+        // The oat file data in the header is always right after the image space.
         /*oat_file_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
@@ -121,7 +139,7 @@
         /*is_pic*/false,
         ImageHeader::kStorageModeUncompressed,
         /*storage_size*/0u);
-    return new DummyImageSpace(map.release(), live_bitmap.release());
+    return new DummyImageSpace(map.release(), live_bitmap.release(), std::move(oat_file));
   }
 };
 
@@ -129,7 +147,9 @@
   ImmuneSpaces spaces;
   constexpr size_t image_size = 123 * kPageSize;
   constexpr size_t image_oat_size = 321 * kPageSize;
-  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size));
+  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size,
+                                                                       0,
+                                                                       image_oat_size));
   ASSERT_TRUE(image_space != nullptr);
   const ImageHeader& image_header = image_space->GetImageHeader();
   EXPECT_EQ(image_header.GetImageSize(), image_size);
@@ -150,6 +170,18 @@
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
             image_space->Begin());
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit());
+  // Check that appending with a gap between the map does not include the oat file.
+  image_space.reset(DummyImageSpace::Create(image_size, kPageSize, image_oat_size));
+  spaces.Reset();
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(image_space.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  // Size should be equal, we should not add the oat file since it is not adjacent to the image
+  // space.
+  EXPECT_EQ(spaces.GetLargestImmuneRegion().Size(), image_size);
 }
 
 }  // namespace collector
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3c9312f..a656fb8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -845,6 +845,13 @@
 void Heap::IncrementDisableThreadFlip(Thread* self) {
   // Supposed to be called by mutators. If thread_flip_running_ is true, block. Otherwise, go ahead.
   CHECK(kUseReadBarrier);
+  bool is_nested = self->GetDisableThreadFlipCount() > 0;
+  self->IncrementDisableThreadFlipCount();
+  if (is_nested) {
+    // If this is a nested JNI critical section enter, we don't need to wait or increment the global
+    // counter. The global counter is incremented only once for a thread for the outermost enter.
+    return;
+  }
   ScopedThreadStateChange tsc(self, kWaitingForGcThreadFlip);
   MutexLock mu(self, *thread_flip_lock_);
   bool has_waited = false;
@@ -867,10 +874,20 @@
   // Supposed to be called by mutators. Decrement disable_thread_flip_count_ and potentially wake up
   // the GC waiting before doing a thread flip.
   CHECK(kUseReadBarrier);
+  self->DecrementDisableThreadFlipCount();
+  bool is_outermost = self->GetDisableThreadFlipCount() == 0;
+  if (!is_outermost) {
+    // If this is not an outermost JNI critical exit, we don't need to decrement the global counter.
+    // The global counter is decremented only once for a thread for the outermost exit.
+    return;
+  }
   MutexLock mu(self, *thread_flip_lock_);
   CHECK_GT(disable_thread_flip_count_, 0U);
   --disable_thread_flip_count_;
-  thread_flip_cond_->Broadcast(self);
+  if (disable_thread_flip_count_ == 0) {
+    // Potentially notify the GC thread blocking to begin a thread flip.
+    thread_flip_cond_->Broadcast(self);
+  }
 }
 
 void Heap::ThreadFlipBegin(Thread* self) {
@@ -882,7 +899,8 @@
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
   CHECK(!thread_flip_running_);
-  // Set this to true before waiting so that a new mutator entering a JNI critical won't starve GC.
+  // Set this to true before waiting so that frequent JNI critical enter/exits won't starve
+  // GC. This like a writer preference of a reader-writer lock.
   thread_flip_running_ = true;
   while (disable_thread_flip_count_ > 0) {
     has_waited = true;
@@ -904,6 +922,7 @@
   MutexLock mu(self, *thread_flip_lock_);
   CHECK(thread_flip_running_);
   thread_flip_running_ = false;
+  // Potentially notify mutator threads blocking to enter a JNI critical section.
   thread_flip_cond_->Broadcast(self);
 }
 
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c02e2d3..a181e23 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -1113,6 +1113,8 @@
   // Used to synchronize between JNI critical calls and the thread flip of the CC collector.
   Mutex* thread_flip_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::unique_ptr<ConditionVariable> thread_flip_cond_ GUARDED_BY(thread_flip_lock_);
+  // This counter keeps track of how many threads are currently in a JNI critical section. This is
+  // incremented once per thread even with nested enters.
   size_t disable_thread_flip_count_ GUARDED_BY(thread_flip_lock_);
   bool thread_flip_running_ GUARDED_BY(thread_flip_lock_);
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 0c06c38..894ce9a 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -676,13 +676,17 @@
         dest_(dest),
         length_(length) {}
 
-  bool ContainsSource(uintptr_t address) const {
+  bool InSource(uintptr_t address) const {
     return address - source_ < length_;
   }
 
+  bool InDest(uintptr_t address) const {
+    return address - dest_ < length_;
+  }
+
   // Translate a source address to the destination space.
   uintptr_t ToDest(uintptr_t address) const {
-    DCHECK(ContainsSource(address));
+    DCHECK(InSource(address));
     return address + Delta();
   }
 
@@ -724,24 +728,28 @@
   template <typename T>
   ALWAYS_INLINE T* ForwardObject(T* src) const {
     const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
-    if (boot_image_.ContainsSource(uint_src)) {
+    if (boot_image_.InSource(uint_src)) {
       return reinterpret_cast<T*>(boot_image_.ToDest(uint_src));
     }
-    if (app_image_.ContainsSource(uint_src)) {
+    if (app_image_.InSource(uint_src)) {
       return reinterpret_cast<T*>(app_image_.ToDest(uint_src));
     }
+    // Since we are fixing up the app image, there should only be pointers to the app image and
+    // boot image.
+    DCHECK(src == nullptr) << reinterpret_cast<const void*>(src);
     return src;
   }
 
   // Return the relocated address of a code pointer (contained by an oat file).
   ALWAYS_INLINE const void* ForwardCode(const void* src) const {
     const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
-    if (boot_oat_.ContainsSource(uint_src)) {
+    if (boot_oat_.InSource(uint_src)) {
      return reinterpret_cast<const void*>(boot_oat_.ToDest(uint_src));
     }
-    if (app_oat_.ContainsSource(uint_src)) {
+    if (app_oat_.InSource(uint_src)) {
       return reinterpret_cast<const void*>(app_oat_.ToDest(uint_src));
     }
+    DCHECK(src == nullptr) << src;
     return src;
   }
 
@@ -766,6 +774,11 @@
   template<typename... Args>
   explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
 
+  // Must be called on pointers that already have been relocated to the destination relocation.
+  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
+    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
+  }
+
   template <typename T>
   T* operator()(T* obj) const {
     return ForwardObject(obj);
@@ -816,7 +829,10 @@
 class FixupObjectVisitor : public FixupVisitor {
  public:
   template<typename... Args>
-  explicit FixupObjectVisitor(Args... args) : FixupVisitor(args...) {}
+  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* pointer_array_visited,
+                              Args... args)
+      : FixupVisitor(args...),
+        pointer_array_visited_(pointer_array_visited) {}
 
   // Fix up separately since we also need to fix up method entrypoints.
   ALWAYS_INLINE void VisitRootIfNonNull(
@@ -841,6 +857,19 @@
     }
   }
 
+  // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
+  // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
+  template <typename Visitor>
+  void VisitPointerArray(mirror::PointerArray* array, const Visitor& visitor) const
+      NO_THREAD_SAFETY_ANALYSIS {
+    if (array != nullptr &&
+        visitor.IsInAppImage(array) &&
+        !pointer_array_visited_->Test(array)) {
+      array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, sizeof(void*), visitor);
+      pointer_array_visited_->Set(array);
+    }
+  }
+
   // java.lang.ref.Reference visitor.
   void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
@@ -859,11 +888,9 @@
       mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
       FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
       klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, sizeof(void*), visitor);
-      // Deal with the arrays.
-      mirror::PointerArray* vtable = klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
-      if (vtable != nullptr) {
-        vtable->Fixup<kVerifyNone, kWithoutReadBarrier>(vtable, sizeof(void*), visitor);
-      }
+      // Deal with the pointer arrays. Use the helper function since multiple classes can reference
+      // the same arrays.
+      VisitPointerArray(klass->GetVTable<kVerifyNone, kWithoutReadBarrier>(), visitor);
       mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
       if (iftable != nullptr) {
         for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
@@ -871,12 +898,15 @@
             mirror::PointerArray* methods =
                 iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
             DCHECK(methods != nullptr);
-            methods->Fixup<kVerifyNone, kWithoutReadBarrier>(methods, sizeof(void*), visitor);
+            VisitPointerArray(methods, visitor);
           }
         }
       }
     }
   }
+
+ private:
+  gc::accounting::ContinuousSpaceBitmap* const pointer_array_visited_;
 };
 
 class ForwardObjectAdapter {
@@ -1010,9 +1040,18 @@
   const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
   uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
   uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
-  // Two pass approach, fix up all classes first, then fix up non class-objects.
-  FixupObjectVisitor fixup_object_visitor(boot_image, boot_oat, app_image, app_oat);
   if (fixup_image) {
+    // Two pass approach, fix up all classes first, then fix up non class-objects.
+    // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
+    std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
+        gc::accounting::ContinuousSpaceBitmap::Create("Pointer array bitmap",
+                                                      target_base,
+                                                      image_header.GetImageSize()));
+    FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
+                                            boot_image,
+                                            boot_oat,
+                                            app_image,
+                                            app_oat);
     TimingLogger::ScopedTiming timing("Fixup classes", &logger);
     // Fixup class only touches app image classes, don't need the mutator lock since the space is
     // not yet visible to the GC.
@@ -1025,7 +1064,7 @@
     bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
     FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
     // Fixup image roots.
-    CHECK(app_image.ContainsSource(reinterpret_cast<uintptr_t>(
+    CHECK(app_image.InSource(reinterpret_cast<uintptr_t>(
         image_header.GetImageRoots<kWithoutReadBarrier>())));
     image_header.RelocateImageObjects(app_image.Delta());
     CHECK_EQ(image_header.GetImageBegin(), target_base);
diff --git a/runtime/gc/space/memory_tool_malloc_space-inl.h b/runtime/gc/space/memory_tool_malloc_space-inl.h
index ea8b8aa..6cb246553 100644
--- a/runtime/gc/space/memory_tool_malloc_space-inl.h
+++ b/runtime/gc/space/memory_tool_malloc_space-inl.h
@@ -240,9 +240,9 @@
                     kAdjustForRedzoneInAllocSize,
                     kUseObjSizeForUsable>::MemoryToolMallocSpace(
     MemMap* mem_map, size_t initial_size, Params... params) : S(mem_map, initial_size, params...) {
-  MEMORY_TOOL_MAKE_DEFINED(mem_map->Begin(), initial_size);
-  MEMORY_TOOL_MAKE_UNDEFINED(mem_map->Begin() + initial_size,
-                     mem_map->Size() - initial_size);
+  // Don't want to change the valgrind states of the mem map here as the allocator is already
+  // initialized at this point and that may interfere with what the allocator does internally. Note
+  // that the tail beyond the initial size is mprotected.
 }
 
 template <typename S,
diff --git a/runtime/image.h b/runtime/image.h
index c449e43..146ee00 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -143,6 +143,8 @@
     oat_checksum_ = oat_checksum;
   }
 
+  // The location that the oat file was expected to be when the image was created. The actual
+  // oat file may be at a different location for application images.
   uint8_t* GetOatFileBegin() const {
     return reinterpret_cast<uint8_t*>(oat_file_begin_);
   }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 56aeefc..e3cbf53 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -290,6 +290,14 @@
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_ || have_invoke_virtual_or_interface_listeners_;
+  }
+
+  // Any instrumentation *other* than what is needed for Jit profiling active?
+  bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
+        have_field_read_listeners_ || have_field_write_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 4fd3c78..a595d33 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -320,12 +320,13 @@
         // No Mterp variant - just use the switch interpreter.
         return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
                                               false);
+      } else if (UNLIKELY(!Runtime::Current()->IsStarted())) {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       } else {
-        const instrumentation::Instrumentation* const instrumentation =
-            Runtime::Current()->GetInstrumentation();
         while (true) {
-          if (instrumentation->IsActive() || !Runtime::Current()->IsStarted()) {
-            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
+          // Mterp does not support all instrumentation/debugging.
+          if (MterpShouldSwitchInterpreters()) {
 #if !defined(__clang__)
             return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
 #else
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index cbaa817..3453abc 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -733,39 +733,21 @@
   }
 
   if (string_init && !self->IsExceptionPending()) {
-    // Set the new string result of the StringFactory.
-    shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
-    // Overwrite all potential copies of the original result of the new-instance of string with the
-    // new result of the StringFactory. Use the verifier to find this set of registers.
-    ArtMethod* method = shadow_frame.GetMethod();
-    MethodReference method_ref = method->ToMethodReference();
-    SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr = nullptr;
-    MethodRefToStringInitRegMap& method_to_string_init_map = Runtime::Current()->GetStringInitMap();
-    {
-      MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.find(method_ref);
-      if (it != method_to_string_init_map.end()) {
-        string_init_map_ptr = &it->second;
-      }
-    }
-    if (string_init_map_ptr == nullptr) {
-      SafeMap<uint32_t, std::set<uint32_t>> string_init_map =
-          verifier::MethodVerifier::FindStringInitMap(method);
-      MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.lower_bound(method_ref);
-      if (it == method_to_string_init_map.end() ||
-          method_to_string_init_map.key_comp()(method_ref, it->first)) {
-        it = method_to_string_init_map.PutBefore(it, method_ref, std::move(string_init_map));
-      }
-      string_init_map_ptr = &it->second;
-    }
-    if (string_init_map_ptr->size() != 0) {
-      uint32_t dex_pc = shadow_frame.GetDexPC();
-      auto map_it = string_init_map_ptr->find(dex_pc);
-      if (map_it != string_init_map_ptr->end()) {
-        const std::set<uint32_t>& reg_set = map_it->second;
-        for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-          shadow_frame.SetVRegReference(*set_it, result->GetL());
+    mirror::Object* existing = shadow_frame.GetVRegReference(string_init_vreg_this);
+    if (existing == nullptr) {
+      // If it's null, we come from compiled code that was deoptimized. Nothing to do,
+      // as the compiler verified there was no alias.
+      // Set the new string result of the StringFactory.
+      shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
+    } else {
+      // Replace the fake string that was allocated with the StringFactory result.
+      for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
+        if (shadow_frame.GetVRegReference(i) == existing) {
+          DCHECK_EQ(shadow_frame.GetVRegReference(i),
+                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
+          shadow_frame.SetVRegReference(i, result->GetL());
+          DCHECK_EQ(shadow_frame.GetVRegReference(i),
+                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
         }
       }
     }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 949112d..19d971e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -948,11 +948,15 @@
   __attribute__((cold))
   SHARED_REQUIRES(Locks::mutator_lock_);
 
+static inline bool TraceExecutionEnabled() {
+  // Return true if you want TraceExecution invocation before each bytecode execution.
+  return false;
+}
+
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  constexpr bool kTracing = false;
-  if (kTracing) {
+  if (TraceExecutionEnabled()) {
 #define TRACE_LOG std::cerr
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index 474bc3c..774e167 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -6,17 +6,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    b${revcmp} .L_${opcode}_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_${opcode}_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -25,10 +37,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    mov${revcmp} rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 1dba856..3456a75 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -12,7 +12,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -103,8 +102,12 @@
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     rPC, r0, #CODEITEM_INSNS_OFFSET
     add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
-    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -116,12 +119,31 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
-    EXPORT_PC
-    mov     r0, rSELF
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck           @ (self)
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
+1:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
 
 /*
  * Bail out to reference interpreter.
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index b2370bf..298af8a 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -85,6 +85,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -109,14 +112,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm/invoke.S b/runtime/interpreter/mterp/arm/invoke.S
index 7575865..e47dd1b 100644
--- a/runtime/interpreter/mterp/arm/invoke.S
+++ b/runtime/interpreter/mterp/arm/invoke.S
@@ -14,6 +14,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index 9b3632a..6861950 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -6,20 +6,28 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
-       @ If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 2231acd..91639ca 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,17 +5,25 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+#if MTERP_PROFILE_BRANCHES
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index 6b72ff5..e730b52 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,21 +10,29 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 1e3370e..4c369cb 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,7 +9,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -17,9 +17,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -30,8 +39,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
index dc8a679..12ea248 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
index fd7668d..4799e77 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
index c0edf90..88a13d6 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
index ffeaf9c..78d8bb7 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
index f64c861..f98ec63 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -12,16 +12,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
index dbab08d..840283d 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -6,17 +6,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 6e9ef55..800804d 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -6,25 +6,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    b${revcmp} .L_${opcode}_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_${opcode}_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    mov${revcmp} rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index ecab2ce..ed850fc 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -6,17 +6,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    mov${condition} w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.${condition} .L_${opcode}_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_${opcode}_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -25,11 +36,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ${condition}    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index b360539..aae78de 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -10,7 +10,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -99,8 +98,11 @@
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     xPC, x0, #CODEITEM_INSNS_OFFSET
     add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -120,10 +122,24 @@
     EXPORT_PC
     mov     x0, xSELF
     bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 351a607..7223750 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -87,6 +87,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -114,14 +117,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm64/invoke.S b/runtime/interpreter/mterp/arm64/invoke.S
index ff1974c..7a32df7 100644
--- a/runtime/interpreter/mterp/arm64/invoke.S
+++ b/runtime/interpreter/mterp/arm64/invoke.S
@@ -9,11 +9,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      $helper
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index db98a45..7e2f6a9 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -6,23 +6,20 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsl #16          // w0<- AAxx0000
-    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
-    add     w2, w1, w1                  // w2<- byte offset, set flags
-       // If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
     lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
-    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index ff66a23..b2b9924 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -5,19 +5,18 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index 8a6980e..b785857 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -10,23 +10,20 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH w0, 1                         // w0<- aaaa (lo)
-    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
+    adds    w1, wINST, wINST            // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
index 165c730..88533bd 100644
--- a/runtime/interpreter/mterp/arm64/op_iget.S
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromCode"}
     /*
      * General instance field get.
      *
@@ -12,6 +12,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       $helper
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    $extend
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index f087d23..e8b4f04 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,20 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      $func                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -30,10 +16,18 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index d4856d2..e528d9f 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -6,26 +6,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    mov${condition} w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.${condition} .L_${opcode}_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_${opcode}_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ${condition}    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 0afd276..8f4741c 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -20,6 +20,8 @@
 #include "interpreter/interpreter_common.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
+#include "jit/jit.h"
+#include "debugger.h"
 
 namespace art {
 namespace interpreter {
@@ -45,7 +47,9 @@
 void InitMterpTls(Thread* self) {
   self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
   self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
-  self->SetMterpCurrentIBase(artMterpAsmInstructionStart);
+  self->SetMterpCurrentIBase(TraceExecutionEnabled() ?
+                             artMterpAsmAltInstructionStart :
+                             artMterpAsmInstructionStart);
 }
 
 /*
@@ -139,6 +143,20 @@
   return entries[index];
 }
 
+extern "C" bool MterpShouldSwitchInterpreters()
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  bool unhandled_instrumentation;
+  // TODO: enable for other targets after more extensive testing.
+  if ((kRuntimeISA == kArm64) || (kRuntimeISA == kArm)) {
+    unhandled_instrumentation = instrumentation->NonJitProfilingActive();
+  } else {
+    unhandled_instrumentation = instrumentation->IsActive();
+  }
+  return unhandled_instrumentation || Dbg::IsDebuggerActive();
+}
+
 
 extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
                                    uint16_t* dex_pc_ptr,  uint16_t inst_data )
@@ -429,6 +447,7 @@
   } else {
     self->AssertNoPendingException();
   }
+  TraceExecution(*shadow_frame, inst, shadow_frame->GetDexPC());
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
@@ -488,6 +507,14 @@
             << self->IsExceptionPending();
 }
 
+extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "OSR: " << inst->Opcode(inst_data) << ", offset = " << offset;
+}
+
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
@@ -500,9 +527,10 @@
   }
 }
 
-extern "C" void MterpSuspendCheck(Thread* self)
+extern "C" bool MterpSuspendCheck(Thread* self)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
+  return MterpShouldSwitchInterpreters();
 }
 
 extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
@@ -618,5 +646,15 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
+extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  instrumentation->Branch(self, method, dex_pc, offset);
+  return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
index 90d21e9..8d24641 100644
--- a/runtime/interpreter/mterp/mterp.h
+++ b/runtime/interpreter/mterp/mterp.h
@@ -30,6 +30,7 @@
 
 void InitMterpTls(Thread* self);
 void CheckMterpAsmConstants();
+extern "C" bool MterpShouldSwitchInterpreters();
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index ee19559..94cbd2d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -92,6 +92,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -116,14 +119,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1111,20 +1106,28 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
-       @ If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
@@ -1143,17 +1146,25 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+#if MTERP_PROFILE_BRANCHES
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1176,21 +1187,29 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1211,7 +1230,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1219,9 +1238,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1232,8 +1260,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1255,7 +1284,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1263,9 +1292,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1276,8 +1314,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1495,17 +1534,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bne .L_op_if_eq_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_eq_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1514,10 +1565,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movne rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1538,17 +1589,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    beq .L_op_if_ne_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ne_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1557,10 +1620,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    moveq rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1581,17 +1644,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bge .L_op_if_lt_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_lt_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1600,10 +1675,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movge rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1624,17 +1699,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    blt .L_op_if_ge_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ge_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1643,10 +1730,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movlt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1667,17 +1754,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    ble .L_op_if_gt_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gt_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1686,10 +1785,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movle rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1710,17 +1809,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bgt .L_op_if_le_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_le_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1729,10 +1840,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movgt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1753,25 +1864,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bne .L_op_if_eqz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_eqz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movne rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1792,25 +1915,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    beq .L_op_if_nez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_nez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    moveq rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1831,25 +1966,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bge .L_op_if_ltz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ltz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movge rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1870,25 +2017,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    blt .L_op_if_gez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movlt rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1909,25 +2068,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    ble .L_op_if_gtz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gtz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movle rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1948,25 +2119,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bgt .L_op_if_lez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_lez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movgt rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -3294,6 +3477,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3326,6 +3512,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3358,6 +3547,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3383,6 +3575,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3409,6 +3604,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3453,6 +3651,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3478,6 +3679,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3503,6 +3707,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3528,6 +3735,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3553,6 +3763,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -4948,16 +5161,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -4980,16 +5193,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -5012,16 +5225,16 @@
     add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6087,17 +6300,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r1, r1, asl r2              @  r1<- r1 << r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r1, r1, asl r2              @ r1<- r1 << r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
-    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6114,17 +6327,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
-    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6141,17 +6354,17 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
+    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
-
-    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
-    rsb     r3, r2, #32                 @  r3<- 32 - r2
-    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
-    subs    ip, r2, #32                 @  ip<- r2 - 32
+    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
+    rsb     r3, r2, #32                 @ r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @ ip<- r2 - 32
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
-    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -7284,6 +7497,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -7309,6 +7525,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -12098,7 +12317,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -12189,8 +12407,12 @@
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     rPC, r0, #CODEITEM_INSNS_OFFSET
     add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
-    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -12202,12 +12424,31 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
-    EXPORT_PC
-    mov     r0, rSELF
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck           @ (self)
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
+1:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
 
 /*
  * Bail out to reference interpreter.
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index e9d28ab..e4825f0 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -94,6 +94,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -121,14 +124,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1087,26 +1082,23 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsl #16          // w0<- AAxx0000
-    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
-    add     w2, w1, w1                  // w2<- byte offset, set flags
-       // If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
     lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
-    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1119,22 +1111,21 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1152,26 +1143,23 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH w0, 1                         // w0<- aaaa (lo)
-    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
+    adds    w1, wINST, wINST            // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1187,20 +1175,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1208,13 +1182,21 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1231,20 +1213,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1252,13 +1220,21 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 
 /* ------------------------------ */
@@ -1396,17 +1372,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    moveq w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.eq .L_op_if_eq_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_eq_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1415,11 +1402,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, eq    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1440,17 +1427,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movne w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.ne .L_op_if_ne_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ne_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1459,11 +1457,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ne    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1484,17 +1482,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movlt w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.lt .L_op_if_lt_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_lt_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1503,11 +1512,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, lt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1528,17 +1537,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movge w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.ge .L_op_if_ge_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ge_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1547,11 +1567,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ge    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1572,17 +1592,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movgt w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.gt .L_op_if_gt_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gt_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1591,11 +1622,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, gt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1616,17 +1647,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movle w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.le .L_op_if_le_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_le_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1635,11 +1677,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, le    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1660,26 +1702,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    moveq w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.eq .L_op_if_eqz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_eqz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, eq    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1700,26 +1753,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movne w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.ne .L_op_if_nez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_nez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ne    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1740,26 +1804,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movlt w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.lt .L_op_if_ltz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ltz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, lt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1780,26 +1855,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movge w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.ge .L_op_if_gez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ge    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1820,26 +1906,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movgt w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.gt .L_op_if_gtz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gtz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, gt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1860,26 +1957,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movle w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.le .L_op_if_lez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_lez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, le    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -2401,6 +2509,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGet32InstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2457,6 +2566,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetObjInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2488,6 +2598,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetBooleanInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2519,6 +2630,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetByteInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2550,6 +2662,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetCharInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2581,6 +2694,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetShortInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -3158,11 +3272,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtual
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3190,11 +3305,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeSuper
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3222,11 +3338,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeDirect
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3247,11 +3364,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeStatic
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3273,11 +3391,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeInterface
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3320,11 +3439,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3345,11 +3465,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeSuperRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3370,11 +3491,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeDirectRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3395,11 +3517,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeStaticRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3420,11 +3543,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeInterfaceRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -6852,11 +6976,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualQuick
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -6877,11 +7002,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualQuickRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -11565,7 +11691,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -11654,8 +11779,11 @@
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     xPC, x0, #CODEITEM_INSNS_OFFSET
     add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -11675,10 +11803,24 @@
     EXPORT_PC
     mov     x0, xSELF
     bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 96229ce..d365a4f 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -189,11 +189,6 @@
 
 /*
  * Refresh handler table.
- * IBase handles uses the caller save register so we must restore it after each call.
- * Also it is used as a result of some 64-bit operations (like imul) and we should
- * restore it in such cases also.
- *
- * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
  */
 .macro REFRESH_IBASE
     movl    rSELF, rIBASE
@@ -201,9 +196,22 @@
 .endm
 
 /*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
  * If rSELF is already loaded then we can use it from known reg.
  */
-.macro REFRESH_IBASE_FROM_SELF _reg
+.macro RESTORE_IBASE_FROM_SELF _reg
     movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
 .endm
 
@@ -771,8 +779,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -790,8 +798,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -809,8 +817,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -828,8 +836,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artLockObjectFromCode)   # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -851,8 +859,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artUnlockObjectFromCode) # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -874,8 +882,8 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -903,7 +911,7 @@
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     andb    $0xf, rINSTbl                  # rINSTbl <- A
@@ -943,8 +951,8 @@
     REFRESH_INST 34
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpNewInstance)
-    REFRESH_IBASE
-    testl   %eax, %eax                 # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -969,8 +977,8 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpNewArray)
-    REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -994,7 +1002,7 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL(MterpFilledNewArray)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1019,7 +1027,7 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL(MterpFilledNewArrayRange)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1037,7 +1045,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1923,7 +1931,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINST
@@ -2090,8 +2098,8 @@
     REFRESH_INST 77
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpAputObject)         # (array, index)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -2221,7 +2229,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGet32InstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2259,7 +2267,7 @@
     andb    $0xf, rINSTbl                  # rINST <- A
     SET_VREG %eax, rINST
     SET_VREG_HIGH %edx, rINST
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2285,7 +2293,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetObjInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2320,7 +2328,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetBooleanInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2355,7 +2363,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetByteInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2390,7 +2398,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetCharInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2425,7 +2433,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetShortInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2461,9 +2469,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet32InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2485,9 +2493,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet64InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2503,9 +2511,9 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpIputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2533,9 +2541,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet8InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2564,9 +2572,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet8InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2595,9 +2603,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet16InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2626,9 +2634,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet16InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2652,7 +2660,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGet32StaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2685,7 +2693,7 @@
     jnz     MterpException
     SET_VREG %eax, rINST                    # fp[A]<- low part
     SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2709,7 +2717,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetObjStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 1
@@ -2741,7 +2749,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetBooleanStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2773,7 +2781,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetByteStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2805,7 +2813,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetCharStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2837,7 +2845,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetShortStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2869,9 +2877,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet32StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2894,9 +2902,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet64IndirectStaticFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2912,9 +2920,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpSputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2939,9 +2947,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet8StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2967,9 +2975,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet8StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2995,9 +3003,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet16StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -3023,9 +3031,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet16StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -3049,9 +3057,9 @@
     REFRESH_INST 110
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtual)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3082,9 +3090,9 @@
     REFRESH_INST 111
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeSuper)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3115,9 +3123,9 @@
     REFRESH_INST 112
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeDirect)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3141,9 +3149,9 @@
     REFRESH_INST 113
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeStatic)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3168,9 +3176,9 @@
     REFRESH_INST 114
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeInterface)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3215,9 +3223,9 @@
     REFRESH_INST 116
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3241,9 +3249,9 @@
     REFRESH_INST 117
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeSuperRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3267,9 +3275,9 @@
     REFRESH_INST 118
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeDirectRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3293,9 +3301,9 @@
     REFRESH_INST 119
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeStaticRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3319,9 +3327,9 @@
     REFRESH_INST 120
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeInterfaceRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -4047,10 +4055,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $0xFFFFFF00, %edx              # If both arguments are less
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .Lop_div_int_8                   # Do 8-bit divide
-    test    $0xFFFF0000, %edx              # If both arguments are less
+    testl   $0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .Lop_div_int_16                  # Do 16-bit divide
     cmpl    $-1, %ecx
@@ -4101,10 +4109,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $0xFFFFFF00, %edx              # If both arguments are less
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .Lop_rem_int_8                   # Do 8-bit divide
-    test    $0xFFFF0000, %edx              # If both arguments are less
+    testl   $0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .Lop_rem_int_16                  # Do 16-bit divide
     cmpl    $-1, %ecx
@@ -4785,9 +4793,9 @@
     sarl    $4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $0xf, %cl                      # ecx <- A
-    mov     rIBASE, LOCAL0(%esp)
+    movl    rIBASE, rINST
     imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movl    rINST, rIBASE
     SET_VREG %eax, %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -5514,11 +5522,11 @@
     movzbl  rINSTbl, %eax                   # eax <- 000000BA
     sarl    $4, %eax                       # eax <- B
     GET_VREG %eax, %eax                     # eax <- vB
-    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
     andb    $0xf, rINSTbl                  # rINST <- A
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -5721,11 +5729,11 @@
 /* File: x86/op_mul_int_lit8.S */
     /* mul/lit8 vAA, vBB, #+CC */
     movzbl  2(rPC), %eax                    # eax <- BB
-    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    movl    rIBASE, %ecx
     GET_VREG  %eax, %eax                    # eax <- rBB
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -5985,7 +5993,7 @@
     EXPORT_PC
     call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf,rINSTbl                   # rINST <- A
@@ -6037,9 +6045,9 @@
     REFRESH_INST 232
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpIputObjectQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -6062,9 +6070,9 @@
     REFRESH_INST 233
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -6088,9 +6096,9 @@
     REFRESH_INST 234
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualQuickRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -12912,7 +12920,7 @@
     lea     OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpHandleException)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpExceptionReturn
     REFRESH_IBASE
     movl    OFF_FP_CODE_ITEM(rFP), %eax
diff --git a/runtime/interpreter/mterp/x86/bindiv.S b/runtime/interpreter/mterp/x86/bindiv.S
index bb5b319..e87ba45 100644
--- a/runtime/interpreter/mterp/x86/bindiv.S
+++ b/runtime/interpreter/mterp/x86/bindiv.S
@@ -13,10 +13,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $$0xFFFFFF00, %edx              # If both arguments are less
+    testl   $$0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .L${opcode}_8                   # Do 8-bit divide
-    test    $$0xFFFF0000, %edx              # If both arguments are less
+    testl   $$0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .L${opcode}_16                  # Do 16-bit divide
     cmpl    $$-1, %ecx
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index 385e784..a1532fa 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -114,7 +114,7 @@
     lea     OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpHandleException)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpExceptionReturn
     REFRESH_IBASE
     movl    OFF_FP_CODE_ITEM(rFP), %eax
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
index 0977b90..3fbbbf9 100644
--- a/runtime/interpreter/mterp/x86/header.S
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -182,11 +182,6 @@
 
 /*
  * Refresh handler table.
- * IBase handles uses the caller save register so we must restore it after each call.
- * Also it is used as a result of some 64-bit operations (like imul) and we should
- * restore it in such cases also.
- *
- * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
  */
 .macro REFRESH_IBASE
     movl    rSELF, rIBASE
@@ -194,9 +189,22 @@
 .endm
 
 /*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
  * If rSELF is already loaded then we can use it from known reg.
  */
-.macro REFRESH_IBASE_FROM_SELF _reg
+.macro RESTORE_IBASE_FROM_SELF _reg
     movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
 .endm
 
diff --git a/runtime/interpreter/mterp/x86/invoke.S b/runtime/interpreter/mterp/x86/invoke.S
index 054fbfd..bbd88cf 100644
--- a/runtime/interpreter/mterp/x86/invoke.S
+++ b/runtime/interpreter/mterp/x86/invoke.S
@@ -14,7 +14,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL($helper)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_aget_object.S b/runtime/interpreter/mterp/x86/op_aget_object.S
index cbfb50c..35ec053 100644
--- a/runtime/interpreter/mterp/x86/op_aget_object.S
+++ b/runtime/interpreter/mterp/x86/op_aget_object.S
@@ -13,7 +13,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINST
diff --git a/runtime/interpreter/mterp/x86/op_aput_object.S b/runtime/interpreter/mterp/x86/op_aput_object.S
index 9cfc221..980b26a 100644
--- a/runtime/interpreter/mterp/x86/op_aput_object.S
+++ b/runtime/interpreter/mterp/x86/op_aput_object.S
@@ -9,7 +9,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpAputObject)         # (array, index)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_check_cast.S b/runtime/interpreter/mterp/x86/op_check_cast.S
index ae2ff9e..d090aa3 100644
--- a/runtime/interpreter/mterp/x86/op_check_cast.S
+++ b/runtime/interpreter/mterp/x86/op_check_cast.S
@@ -12,7 +12,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_class.S b/runtime/interpreter/mterp/x86/op_const_class.S
index 343e110..60be789 100644
--- a/runtime/interpreter/mterp/x86/op_const_class.S
+++ b/runtime/interpreter/mterp/x86/op_const_class.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string.S b/runtime/interpreter/mterp/x86/op_const_string.S
index bbac69c..ff93b23 100644
--- a/runtime/interpreter/mterp/x86/op_const_string.S
+++ b/runtime/interpreter/mterp/x86/op_const_string.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
index 4236807..e7f952a 100644
--- a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_fill_array_data.S b/runtime/interpreter/mterp/x86/op_fill_array_data.S
index 004aed9..5855284 100644
--- a/runtime/interpreter/mterp/x86/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/x86/op_fill_array_data.S
@@ -7,6 +7,6 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array.S b/runtime/interpreter/mterp/x86/op_filled_new_array.S
index a2bac29..35b2fe8 100644
--- a/runtime/interpreter/mterp/x86/op_filled_new_array.S
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array.S
@@ -15,6 +15,6 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL($helper)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
index 9932610..e3304ba 100644
--- a/runtime/interpreter/mterp/x86/op_iget.S
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -17,7 +17,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL($helper)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
diff --git a/runtime/interpreter/mterp/x86/op_iget_object_quick.S b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
index fe16694..b1551a0 100644
--- a/runtime/interpreter/mterp/x86/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
@@ -9,7 +9,7 @@
     EXPORT_PC
     call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $$0xf,rINSTbl                   # rINST <- A
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
index 92126b4..a5d7e69 100644
--- a/runtime/interpreter/mterp/x86/op_iget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -21,5 +21,5 @@
     andb    $$0xf, rINSTbl                  # rINST <- A
     SET_VREG %eax, rINST
     SET_VREG_HIGH %edx, rINST
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_instance_of.S b/runtime/interpreter/mterp/x86/op_instance_of.S
index fd5bf44..e6fe5b2 100644
--- a/runtime/interpreter/mterp/x86/op_instance_of.S
+++ b/runtime/interpreter/mterp/x86/op_instance_of.S
@@ -18,7 +18,7 @@
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     andb    $$0xf, rINSTbl                  # rINSTbl <- A
diff --git a/runtime/interpreter/mterp/x86/op_iput.S b/runtime/interpreter/mterp/x86/op_iput.S
index 13cfe5c..c847e2d 100644
--- a/runtime/interpreter/mterp/x86/op_iput.S
+++ b/runtime/interpreter/mterp/x86/op_iput.S
@@ -19,7 +19,7 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL($handler)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object.S b/runtime/interpreter/mterp/x86/op_iput_object.S
index f63075c..e013697 100644
--- a/runtime/interpreter/mterp/x86/op_iput_object.S
+++ b/runtime/interpreter/mterp/x86/op_iput_object.S
@@ -7,7 +7,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpIputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object_quick.S b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
index d54b1b7..cb77929 100644
--- a/runtime/interpreter/mterp/x86/op_iput_object_quick.S
+++ b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
@@ -5,7 +5,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpIputObjectQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide.S b/runtime/interpreter/mterp/x86/op_iput_wide.S
index 573e14d..122eecf 100644
--- a/runtime/interpreter/mterp/x86/op_iput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iput_wide.S
@@ -13,7 +13,7 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet64InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_monitor_enter.S b/runtime/interpreter/mterp/x86/op_monitor_enter.S
index 9e885bd..b35c684 100644
--- a/runtime/interpreter/mterp/x86/op_monitor_enter.S
+++ b/runtime/interpreter/mterp/x86/op_monitor_enter.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artLockObjectFromCode)   # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_monitor_exit.S b/runtime/interpreter/mterp/x86/op_monitor_exit.S
index 0904800..2d17d5e 100644
--- a/runtime/interpreter/mterp/x86/op_monitor_exit.S
+++ b/runtime/interpreter/mterp/x86/op_monitor_exit.S
@@ -12,7 +12,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artUnlockObjectFromCode) # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
index f92a28e..da699ae 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
@@ -3,8 +3,8 @@
     sarl    $$4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $$0xf, %cl                      # ecx <- A
-    mov     rIBASE, LOCAL0(%esp)
+    movl    rIBASE, rINST
     imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movl    rINST, rIBASE
     SET_VREG %eax, %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
index 31ab613..056f491 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
@@ -3,10 +3,10 @@
     movzbl  rINSTbl, %eax                   # eax <- 000000BA
     sarl    $$4, %eax                       # eax <- B
     GET_VREG %eax, %eax                     # eax <- vB
-    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
     andb    $$0xf, rINSTbl                  # rINST <- A
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
index 6637aa7..59b3844 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
@@ -1,9 +1,9 @@
     /* mul/lit8 vAA, vBB, #+CC */
     movzbl  2(rPC), %eax                    # eax <- BB
-    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    movl    rIBASE, %ecx
     GET_VREG  %eax, %eax                    # eax <- rBB
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_array.S b/runtime/interpreter/mterp/x86/op_new_array.S
index 2490477..16226e9 100644
--- a/runtime/interpreter/mterp/x86/op_new_array.S
+++ b/runtime/interpreter/mterp/x86/op_new_array.S
@@ -15,7 +15,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpNewArray)
-    REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_instance.S b/runtime/interpreter/mterp/x86/op_new_instance.S
index 712a5eb..f976acc 100644
--- a/runtime/interpreter/mterp/x86/op_new_instance.S
+++ b/runtime/interpreter/mterp/x86/op_new_instance.S
@@ -10,7 +10,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpNewInstance)
-    REFRESH_IBASE
-    testl   %eax, %eax                 # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
index ec96458..0e9a3d8 100644
--- a/runtime/interpreter/mterp/x86/op_sget.S
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -15,7 +15,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL($helper)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if $is_object
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
index 833f266..2b60303 100644
--- a/runtime/interpreter/mterp/x86/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -17,5 +17,5 @@
     jnz     MterpException
     SET_VREG %eax, rINST                    # fp[A]<- low part
     SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
index a199281..0b5de09 100644
--- a/runtime/interpreter/mterp/x86/op_sput.S
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -16,7 +16,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL($helper)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_object.S b/runtime/interpreter/mterp/x86/op_sput_object.S
index e3e57fc..0db5177 100644
--- a/runtime/interpreter/mterp/x86/op_sput_object.S
+++ b/runtime/interpreter/mterp/x86/op_sput_object.S
@@ -7,7 +7,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpSputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
index 7544838..19cff0d 100644
--- a/runtime/interpreter/mterp/x86/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -14,7 +14,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet64IndirectStaticFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 60ad0cb..0e175b8 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -261,6 +261,16 @@
   }
 }
 
+void UnstartedRuntime::UnstartedClassGetEnclosingClass(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsClass()));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    result->SetL(nullptr);
+  }
+  result->SetL(klass->GetDexFile().GetEnclosingClass(klass));
+}
+
 void UnstartedRuntime::UnstartedVmClassLoaderFindLoadedClass(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   mirror::String* class_name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index 047e9066..6d4d711 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -24,6 +24,7 @@
   V(ClassClassForName, "java.lang.Class java.lang.Class.classForName(java.lang.String, boolean, java.lang.ClassLoader)") \
   V(ClassNewInstance, "java.lang.Object java.lang.Class.newInstance()") \
   V(ClassGetDeclaredField, "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") \
+  V(ClassGetEnclosingClass, "java.lang.Class java.lang.Class.getEnclosingClass()") \
   V(VmClassLoaderFindLoadedClass, "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") \
   V(VoidLookupType, "java.lang.Class java.lang.Void.lookupType()") \
   V(SystemArraycopy, "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)") \
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 80c174c..bdc7ee2 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -113,8 +113,7 @@
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>(
-      dlsym(jit_library_handle_, "jit_load"));
+  jit_load_ = reinterpret_cast<void* (*)(bool*)>(dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't find jit_load entry point";
@@ -141,23 +140,15 @@
     *error_msg = "JIT couldn't find jit_types_loaded entry point";
     return false;
   }
-  CompilerCallbacks* callbacks = nullptr;
   bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols);
+  jit_compiler_handle_ = (jit_load_)(&will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
     return false;
   }
-  if (callbacks == nullptr) {
-    dlclose(jit_library_handle_);
-    *error_msg = "JIT compiler callbacks were not set";
-    jit_compiler_handle_ = nullptr;
-    return false;
-  }
-  compiler_callbacks_ = callbacks;
   generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
@@ -290,7 +281,15 @@
   }
 
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
-    VLOG(jit) << "OSR not supported on this platform";
+    VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA;
+    return false;
+  }
+
+  if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
+    // Don't attempt to do an OSR if we are close to the stack limit. Since
+    // the interpreter frames are still on stack, OSR has the potential
+    // to stack overflow even for a simple loop.
+    // b/27094810.
     return false;
   }
 
@@ -304,8 +303,9 @@
     return false;
   }
 
-  // Fetch some data before looking up for an OSR method, as we don't want thread
-  // suspension once we hold an OSR method.
+  // Fetch some data before looking up for an OSR method. We don't want thread
+  // suspension once we hold an OSR method, as the JIT code cache could delete the OSR
+  // method while we are being suspended.
   const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
   const char* shorty = method->GetShorty();
   std::string method_name(VLOG_IS_ON(jit) ? PrettyMethod(method) : "");
@@ -360,7 +360,7 @@
         DexRegisterLocation::Kind location =
             vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
         if (location == DexRegisterLocation::Kind::kNone) {
-          // Dex register is dead or unitialized.
+          // Dex register is dead or uninitialized.
           continue;
         }
 
@@ -369,7 +369,8 @@
           continue;
         }
 
-        DCHECK(location == DexRegisterLocation::Kind::kInStack);
+        DCHECK(location == DexRegisterLocation::Kind::kInStack)
+            << DexRegisterLocation::PrettyDescriptor(location);
 
         int32_t vreg_value = shadow_frame->GetVReg(vreg);
         int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 042da92..109ca3d 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -32,7 +32,6 @@
 namespace art {
 
 class ArtMethod;
-class CompilerCallbacks;
 struct RuntimeArgumentMap;
 
 namespace jit {
@@ -55,9 +54,6 @@
                                   size_t warmup_threshold,
                                   size_t osr_threshold);
   void CreateThreadPool();
-  CompilerCallbacks* GetCompilerCallbacks() {
-    return compiler_callbacks_;
-  }
   const JitCodeCache* GetCodeCache() const {
     return code_cache_.get();
   }
@@ -108,7 +104,7 @@
   // JIT compiler
   void* jit_library_handle_;
   void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**, bool*);
+  void* (*jit_load_)(bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
   void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
@@ -119,7 +115,6 @@
 
   std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
-  CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
   bool save_profiling_info_;
   bool generate_debug_info_;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 9111ddf..d5a9d66 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -232,25 +232,20 @@
 void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  const uint8_t* data = method_header->GetNativeGcMap();
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  if (data != nullptr) {
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
-  }
-  data = method_header->GetMappingTable();
-  if (data != nullptr) {
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
-  }
+
+  FreeData(const_cast<uint8_t*>(method_header->GetNativeGcMap()));
+  FreeData(const_cast<uint8_t*>(method_header->GetMappingTable()));
   // Use the offset directly to prevent sanity check that the method is
   // compiled with optimizing.
   // TODO(ngeoffray): Clean up.
   if (method_header->vmap_table_offset_ != 0) {
-    data = method_header->code_ - method_header->vmap_table_offset_;
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
+    const uint8_t* data = method_header->code_ - method_header->vmap_table_offset_;
+    FreeData(const_cast<uint8_t*>(data));
   }
-  mspace_free(code_mspace_, reinterpret_cast<uint8_t*>(allocation));
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
@@ -269,11 +264,19 @@
       }
     }
   }
+  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+    if (alloc.ContainsUnsafe(it->first)) {
+      // Note that the code has already been removed in the loop above.
+      it = osr_code_map_.erase(it);
+    } else {
+      ++it;
+    }
+  }
   for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
     ProfilingInfo* info = *it;
     if (alloc.ContainsUnsafe(info->GetMethod())) {
       info->GetMethod()->SetProfilingInfo(nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+      FreeData(reinterpret_cast<uint8_t*>(info));
       it = profiling_infos_.erase(it);
     } else {
       ++it;
@@ -299,19 +302,18 @@
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
+  uint8_t* memory = nullptr;
   {
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
     {
       ScopedCodeCacheWrite scc(code_map_.get());
-      uint8_t* result = reinterpret_cast<uint8_t*>(
-          mspace_memalign(code_mspace_, alignment, total_size));
-      if (result == nullptr) {
+      memory = AllocateCode(total_size);
+      if (memory == nullptr) {
         return nullptr;
       }
-      code_ptr = result + header_size;
-      DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+      code_ptr = memory + header_size;
 
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
@@ -368,9 +370,7 @@
 }
 
 size_t JitCodeCache::CodeCacheSizeLocked() {
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return used_memory_for_code_;
 }
 
 size_t JitCodeCache::DataCacheSize() {
@@ -379,9 +379,7 @@
 }
 
 size_t JitCodeCache::DataCacheSizeLocked() {
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return used_memory_for_data_;
 }
 
 size_t JitCodeCache::NumberOfCompiledCode() {
@@ -391,7 +389,7 @@
 
 void JitCodeCache::ClearData(Thread* self, void* data) {
   MutexLock mu(self, lock_);
-  mspace_free(data_mspace_, data);
+  FreeData(reinterpret_cast<uint8_t*>(data));
 }
 
 uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
@@ -402,7 +400,7 @@
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
-    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+    result = AllocateData(size);
   }
 
   if (result == nullptr) {
@@ -411,7 +409,7 @@
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
-    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+    result = AllocateData(size);
   }
 
   return result;
@@ -544,7 +542,7 @@
   // we hold the lock.
   {
     MutexLock mu(self, lock_);
-    if (!garbage_collect_code_) {
+    if (!garbage_collect_code_ || current_capacity_ < kReservedCapacity) {
       IncreaseCodeCacheCapacity();
       NotifyCollectionDone(self);
       return;
@@ -620,12 +618,11 @@
       }
     }
 
-    void* data_mspace = data_mspace_;
     // Free all profiling infos of methods that were not being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
-      [data_mspace] (ProfilingInfo* info) {
+      [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
         if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
-          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          FreeData(reinterpret_cast<uint8_t*>(info));
           return true;
         }
         return false;
@@ -710,7 +707,7 @@
     return info;
   }
 
-  uint8_t* data = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, profile_info_size));
+  uint8_t* data = AllocateData(profile_info_size);
   if (data == nullptr) {
     return nullptr;
   }
@@ -782,5 +779,51 @@
   return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
 }
 
+void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,
+                                             const OatQuickMethodHeader* header) {
+  if (method->GetEntryPointFromQuickCompiledCode() == header->GetEntryPoint()) {
+    // The entrypoint is the one to invalidate, so we just update
+    // it to the interpreter entry point and clear the counter to get the method
+    // Jitted again.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, GetQuickToInterpreterBridge());
+    method->ClearCounter();
+  } else {
+    MutexLock mu(Thread::Current(), lock_);
+    auto it = osr_code_map_.find(method);
+    if (it != osr_code_map_.end() && OatQuickMethodHeader::FromCodePointer(it->second) == header) {
+      // Remove the OSR method, to avoid using it again.
+      osr_code_map_.erase(it);
+    }
+  }
+}
+
+uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  uint8_t* result = reinterpret_cast<uint8_t*>(
+      mspace_memalign(code_mspace_, alignment, code_size));
+  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+  // Ensure the header ends up at expected instruction alignment.
+  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
+  used_memory_for_code_ += mspace_usable_size(result);
+  return result;
+}
+
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
+}
+
+uint8_t* JitCodeCache::AllocateData(size_t data_size) {
+  void* result = mspace_malloc(data_mspace_, data_size);
+  used_memory_for_data_ += mspace_usable_size(result);
+  return reinterpret_cast<uint8_t*>(result);
+}
+
+void JitCodeCache::FreeData(uint8_t* data) {
+  used_memory_for_data_ -= mspace_usable_size(data);
+  mspace_free(data_mspace_, data);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 048f8d0..74ce7b5 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -49,7 +49,10 @@
   static constexpr size_t kMaxCapacity = 64 * MB;
   // Put the default to a very low amount for debug builds to stress the code cache
   // collection.
-  static constexpr size_t kInitialCapacity = kIsDebugBuild ? 16 * KB : 64 * KB;
+  static constexpr size_t kInitialCapacity = kIsDebugBuild ? 8 * KB : 64 * KB;
+
+  // By default, do not GC until reaching 256KB.
+  static constexpr size_t kReservedCapacity = kInitialCapacity * 4;
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
@@ -172,6 +175,10 @@
 
   size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
 
+  void InvalidateCompiledCodeFor(ArtMethod* method, const OatQuickMethodHeader* code)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -272,6 +279,17 @@
   // Whether we can do garbage collection.
   const bool garbage_collect_code_;
 
+  // The size in bytes of used memory for the data portion of the code cache.
+  size_t used_memory_for_data_ GUARDED_BY(lock_);
+
+  // The size in bytes of used memory for the code portion of the code cache.
+  size_t used_memory_for_code_ GUARDED_BY(lock_);
+
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
+  uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
+  void FreeData(uint8_t* data) REQUIRES(lock_);
+  uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
+
   // Number of compilations done throughout the lifetime of the JIT.
   size_t number_of_compilations_ GUARDED_BY(lock_);
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 422832e..3f806d3 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -532,8 +532,9 @@
   return GetFieldPtr<LengthPrefixedArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
-  Class* super_class = GetSuperClass();
+  Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
       ? MemberOffset(RoundUp(super_class->GetObjectSize(),
                              sizeof(mirror::HeapReference<mirror::Object>)))
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index cdc6204..9190e44 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1048,5 +1048,11 @@
   return depth;
 }
 
+uint32_t Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
+  std::string temp;
+  const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
+  return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 388a231..6e3463c 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1006,6 +1006,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the offset of the first reference instance field. Other reference instance fields follow.
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   MemberOffset GetFirstReferenceInstanceFieldOffset()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1119,6 +1121,9 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx);
   }
 
+  uint32_t FindTypeIndexInOtherDexFile(const DexFile& dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Class* GetJavaLangClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(HasJavaLangClass());
     return java_lang_Class_.Read();
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index eb391be4..76a36ac 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -1068,7 +1068,7 @@
       MemberOffset field_offset = kIsStatic
           ? klass->GetFirstReferenceStaticFieldOffset<kVerifyFlags, kReadBarrierOption>(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
-          : klass->GetFirstReferenceInstanceFieldOffset();
+          : klass->GetFirstReferenceInstanceFieldOffset<kVerifyFlags, kReadBarrierOption>();
       for (size_t i = 0u; i < num_reference_fields; ++i) {
         // TODO: Do a simpler check?
         if (field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index bcc2d33..910163c 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,6 +40,12 @@
 class OatHeader;
 class OatDexFile;
 
+namespace gc {
+namespace collector {
+class DummyOatFile;
+}  // namespace collector
+}  // namespace gc
+
 class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
@@ -312,6 +318,7 @@
   // elements. std::list<> and std::deque<> satisfy this requirement, std::vector<> doesn't.
   mutable std::list<std::string> string_cache_ GUARDED_BY(secondary_lookup_lock_);
 
+  friend class gc::collector::DummyOatFile;  // For modifying begin_ and end_.
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index f9d916a..d64aa43 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -394,6 +394,7 @@
 // Intended for local changes only.
 static void MaybeOverrideVerbosity() {
   //  gLogVerbosity.class_linker = true;  // TODO: don't check this in!
+  //  gLogVerbosity.collector = true;  // TODO: don't check this in!
   //  gLogVerbosity.compiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.deopt = true;  // TODO: don't check this in!
   //  gLogVerbosity.gc = true;  // TODO: don't check this in!
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 786cf06..2dfa860 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -23,6 +23,8 @@
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "handle_scope-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/throwable.h"
@@ -288,13 +290,18 @@
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
         single_frame_done_(false),
-        single_frame_deopt_method_(nullptr) {
+        single_frame_deopt_method_(nullptr),
+        single_frame_deopt_quick_method_header_(nullptr) {
   }
 
   ArtMethod* GetSingleFrameDeoptMethod() const {
     return single_frame_deopt_method_;
   }
 
+  const OatQuickMethodHeader* GetSingleFrameDeoptQuickMethodHeader() const {
+    return single_frame_deopt_quick_method_header_;
+  }
+
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
@@ -366,6 +373,7 @@
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
         single_frame_deopt_method_ = method;
+        single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
       return true;
     }
@@ -601,6 +609,7 @@
   const bool single_frame_deopt_;
   bool single_frame_done_;
   ArtMethod* single_frame_deopt_method_;
+  const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -629,13 +638,17 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
-  // Compiled code made an explicit deoptimization. Transfer the code
-  // to interpreter and clear the counter to JIT the method again.
+  // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
-  deopt_method->ClearCounter();
-  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-      deopt_method, GetQuickToInterpreterBridge());
+  if (Runtime::Current()->UseJit()) {
+    Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+        deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
+  } else {
+    // Transfer the code to interpreter.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        deopt_method, GetQuickToInterpreterBridge());
+  }
 
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2aeb792..eb5455a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1300,6 +1300,10 @@
   VLOG(startup) << "Runtime::InitNativeMethods exiting";
 }
 
+void Runtime::ReclaimArenaPoolMemory() {
+  arena_pool_->LockReclaimMemory();
+}
+
 void Runtime::InitThreadGroups(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
   ScopedJniEnvLocalRefState env_state(env);
@@ -1887,7 +1891,6 @@
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() != nullptr) {
-    compiler_callbacks_ = jit_->GetCompilerCallbacks();
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
                                      jit_options_->GetWarmupThreshold(),
                                      jit_options_->GetOsrThreshold());
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 1956bae..8aac4ce 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -94,8 +94,6 @@
 class Transaction;
 
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
-typedef SafeMap<MethodReference, SafeMap<uint32_t, std::set<uint32_t>>,
-    MethodReferenceComparator> MethodRefToStringInitRegMap;
 
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
 // (no logical reason to do this). You also may not start logging new roots and stop logging new
@@ -566,6 +564,9 @@
   const ArenaPool* GetArenaPool() const {
     return arena_pool_.get();
   }
+
+  void ReclaimArenaPoolMemory();
+
   LinearAlloc* GetLinearAlloc() {
     return linear_alloc_.get();
   }
@@ -574,10 +575,6 @@
     return jit_options_.get();
   }
 
-  MethodRefToStringInitRegMap& GetStringInitMap() {
-    return method_ref_string_init_reg_map_;
-  }
-
   bool IsDebuggable() const;
 
   // Returns the build fingerprint, if set. Otherwise an empty string is returned.
@@ -803,8 +800,6 @@
   // Experimental opcodes should not be used by other production code.
   ExperimentalFlags experimental_flags_;
 
-  MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
-
   // Contains the build fingerprint, if given as a parameter.
   std::string fingerprint_;
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 2726e91..97c47e1 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -852,6 +852,22 @@
     tls32_.weak_ref_access_enabled = enabled;
   }
 
+  uint32_t GetDisableThreadFlipCount() const {
+    CHECK(kUseReadBarrier);
+    return tls32_.disable_thread_flip_count;
+  }
+
+  void IncrementDisableThreadFlipCount() {
+    CHECK(kUseReadBarrier);
+    ++tls32_.disable_thread_flip_count;
+  }
+
+  void DecrementDisableThreadFlipCount() {
+    CHECK(kUseReadBarrier);
+    DCHECK_GT(tls32_.disable_thread_flip_count, 0U);
+    --tls32_.disable_thread_flip_count;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -1214,7 +1230,8 @@
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
       suspended_at_suspend_check(false), ready_for_debug_invoke(false),
-      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true) {
+      debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
+      disable_thread_flip_count(0) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1281,6 +1298,11 @@
     // pause, this is not an issue.) Other collectors use Runtime::DisallowNewSystemWeaks() and
     // ReferenceProcessor::EnableSlowPath().
     bool32_t weak_ref_access_enabled;
+
+    // A thread local version of Heap::disable_thread_flip_count_. This keeps track of how many
+    // levels of (nested) JNI critical sections the thread is in and is used to detect a nested JNI
+    // critical section enter.
+    uint32_t disable_thread_flip_count;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/utils.h b/runtime/utils.h
index 36f9abf..83ac0b87 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -111,6 +111,11 @@
       : std::abs(value);
 }
 
+template <typename T>
+inline typename std::make_unsigned<T>::type MakeUnsigned(T x) {
+  return static_cast<typename std::make_unsigned<T>::type>(x);
+}
+
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1d31408..0c6060e 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -617,23 +617,6 @@
   return GetQuickInvokedMethod(inst, register_line, is_range, false);
 }
 
-SafeMap<uint32_t, std::set<uint32_t>> MethodVerifier::FindStringInitMap(ArtMethod* m) {
-  Thread* self = Thread::Current();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(),
-                          true, true, false, true);
-  // Avoid copying: The map is moved out of the verifier before the verifier is destroyed.
-  return std::move(verifier.FindStringInitMap());
-}
-
-SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() {
-  Verify();
-  return GetStringInitPcRegMap();
-}
-
 bool MethodVerifier::Verify() {
   // Some older code doesn't correctly mark constructors as such. Test for this case by looking at
   // the name.
@@ -1960,8 +1943,8 @@
   // We need to ensure the work line is consistent while performing validation. When we spot a
   // peephole pattern we compute a new line for either the fallthrough instruction or the
   // branch target.
-  ArenaUniquePtr<RegisterLine> branch_line;
-  ArenaUniquePtr<RegisterLine> fallthrough_line;
+  RegisterLineArenaUniquePtr branch_line;
+  RegisterLineArenaUniquePtr fallthrough_line;
 
   switch (inst->Opcode()) {
     case Instruction::NOP:
@@ -2865,8 +2848,7 @@
          * Replace the uninitialized reference with an initialized one. We need to do this for all
          * registers that have the same object instance in them, not just the "this" register.
          */
-        const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-        work_line_->MarkRefsAsInitialized(this, this_type, this_reg, work_insn_idx_);
+        work_line_->MarkRefsAsInitialized(this, this_type);
       }
       if (return_type == nullptr) {
         return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor, false);
@@ -4824,7 +4806,7 @@
       AdjustReturnLine(this, ret_inst, target_line);
     }
   } else {
-    ArenaUniquePtr<RegisterLine> copy;
+    RegisterLineArenaUniquePtr copy;
     if (kDebugVerify) {
       copy.reset(RegisterLine::Create(target_line->NumRegs(), this));
       copy->CopyFromLine(target_line);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index c7d1e6b..6d8e1ab 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -30,6 +30,7 @@
 #include "handle.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
+#include "register_line.h"
 #include "reg_type_cache.h"
 
 namespace art {
@@ -45,6 +46,7 @@
 class DexPcToReferenceMap;
 class MethodVerifier;
 class RegisterLine;
+using RegisterLineArenaUniquePtr = std::unique_ptr<RegisterLine, RegisterLineArenaDelete>;
 class RegType;
 
 /*
@@ -127,7 +129,7 @@
   }
 
  private:
-  ScopedArenaVector<ArenaUniquePtr<RegisterLine>> register_lines_;
+  ScopedArenaVector<RegisterLineArenaUniquePtr> register_lines_;
 
   DISALLOW_COPY_AND_ASSIGN(PcToRegisterLineTable);
 };
@@ -211,9 +213,6 @@
   static ArtMethod* FindInvokedMethodAtDexPc(ArtMethod* m, uint32_t dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static SafeMap<uint32_t, std::set<uint32_t>> FindStringInitMap(ArtMethod* m)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   static void Init() SHARED_REQUIRES(Locks::mutator_lock_);
   static void Shutdown();
 
@@ -292,10 +291,6 @@
   ArtField* GetQuickFieldAccess(const Instruction* inst, RegisterLine* reg_line)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() {
-    return string_init_pc_reg_map_;
-  }
-
   uint32_t GetEncounteredFailureTypes() {
     return encountered_failure_types_;
   }
@@ -771,14 +766,14 @@
   PcToRegisterLineTable reg_table_;
 
   // Storage for the register status we're currently working on.
-  ArenaUniquePtr<RegisterLine> work_line_;
+  RegisterLineArenaUniquePtr work_line_;
 
   // The address of the instruction we're currently working on, note that this is in 2 byte
   // quantities
   uint32_t work_insn_idx_;
 
   // Storage for the register status we're saving for later.
-  ArenaUniquePtr<RegisterLine> saved_line_;
+  RegisterLineArenaUniquePtr saved_line_;
 
   const uint32_t dex_method_idx_;  // The method we're working on.
   // Its object representation if known.
@@ -873,11 +868,6 @@
 
   friend class art::Thread;
 
-  // Map of dex pcs of invocations of java.lang.String.<init> to the set of other registers that
-  // contain the uninitialized this pointer to that invoke. Will contain no entry if there are
-  // no other registers.
-  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
-
   DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 330c06a..29d87c4 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -185,9 +185,12 @@
   }
 }
 
+inline size_t RegisterLine::ComputeSize(size_t num_regs) {
+  return OFFSETOF_MEMBER(RegisterLine, line_) + num_regs * sizeof(uint16_t);
+}
+
 inline RegisterLine* RegisterLine::Create(size_t num_regs, MethodVerifier* verifier) {
-  void* memory = verifier->GetArena().Alloc(OFFSETOF_MEMBER(RegisterLine, line_) +
-                                                (num_regs * sizeof(uint16_t)));
+  void* memory = verifier->GetArena().Alloc(ComputeSize(num_regs));
   return new (memory) RegisterLine(num_regs, verifier);
 }
 
@@ -200,6 +203,13 @@
   SetResultTypeToUnknown(verifier);
 }
 
+inline void RegisterLineArenaDelete::operator()(RegisterLine* ptr) const {
+  if (ptr != nullptr) {
+    ptr->~RegisterLine();
+    ProtectMemory(ptr, RegisterLine::ComputeSize(ptr->NumRegs()));
+  }
+}
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index b7cde99..82c371d 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -91,25 +91,14 @@
   return true;
 }
 
-void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
-                                         uint32_t this_reg, uint32_t dex_pc) {
+void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type) {
   DCHECK(uninit_type.IsUninitializedTypes());
-  bool is_string = !uninit_type.IsUnresolvedTypes() && uninit_type.GetClass()->IsStringClass();
   const RegType& init_type = verifier->GetRegTypeCache()->FromUninitialized(uninit_type);
   size_t changed = 0;
   for (uint32_t i = 0; i < num_regs_; i++) {
     if (GetRegisterType(verifier, i).Equals(uninit_type)) {
       line_[i] = init_type.GetId();
       changed++;
-      if (is_string && i != this_reg) {
-        auto it = verifier->GetStringInitPcRegMap().find(dex_pc);
-        if (it != verifier->GetStringInitPcRegMap().end()) {
-          it->second.insert(i);
-        } else {
-          std::set<uint32_t> reg_set = { i };
-          verifier->GetStringInitPcRegMap().Put(dex_pc, reg_set);
-        }
-      }
     }
   }
   // Is this initializing "this"?
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index b2f5555..15ae202 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -99,11 +99,14 @@
   // available now. An example is sharpening types after a check-cast. Note that when given kKeep,
   // the new_type is dchecked to be a reference type.
   template <LockOp kLockOp>
-  ALWAYS_INLINE bool SetRegisterType(MethodVerifier* verifier, uint32_t vdst,
+  ALWAYS_INLINE bool SetRegisterType(MethodVerifier* verifier,
+                                     uint32_t vdst,
                                      const RegType& new_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool SetRegisterTypeWide(MethodVerifier* verifier, uint32_t vdst, const RegType& new_type1,
+  bool SetRegisterTypeWide(MethodVerifier* verifier,
+                           uint32_t vdst,
+                           const RegType& new_type1,
                            const RegType& new_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -117,11 +120,14 @@
   // Get the type of register vsrc.
   const RegType& GetRegisterType(MethodVerifier* verifier, uint32_t vsrc) const;
 
-  ALWAYS_INLINE bool VerifyRegisterType(MethodVerifier* verifier, uint32_t vsrc,
+  ALWAYS_INLINE bool VerifyRegisterType(MethodVerifier* verifier,
+                                        uint32_t vsrc,
                                         const RegType& check_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool VerifyRegisterTypeWide(MethodVerifier* verifier, uint32_t vsrc, const RegType& check_type1,
+  bool VerifyRegisterTypeWide(MethodVerifier* verifier,
+                              uint32_t vsrc,
+                              const RegType& check_type1,
                               const RegType& check_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -155,8 +161,7 @@
    * reference type. This is called when an appropriate constructor is invoked -- all copies of
    * the reference must be marked as initialized.
    */
-  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
-                             uint32_t this_reg, uint32_t dex_pc)
+  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -197,6 +202,9 @@
     return num_regs_;
   }
 
+  // Return how many bytes of memory a register line uses.
+  ALWAYS_INLINE static size_t ComputeSize(size_t num_regs);
+
   /*
    * Get the "this" pointer from a non-static method invocation. This returns the RegType so the
    * caller can decide whether it needs the reference to be initialized or not. (Can also return
@@ -207,31 +215,42 @@
    * allow_failure will return Conflict() instead of causing a verification failure if there is an
    * error.
    */
-  const RegType& GetInvocationThis(MethodVerifier* verifier, const Instruction* inst,
-                                   bool is_range, bool allow_failure = false)
+  const RegType& GetInvocationThis(MethodVerifier* verifier,
+                                   const Instruction* inst,
+                                   bool is_range,
+                                   bool allow_failure = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
    * Verify types for a simple two-register instruction (e.g. "neg-int").
    * "dst_type" is stored into vA, and "src_type" is verified against vB.
    */
-  void CheckUnaryOp(MethodVerifier* verifier, const Instruction* inst, const RegType& dst_type,
+  void CheckUnaryOp(MethodVerifier* verifier,
+                    const Instruction* inst,
+                    const RegType& dst_type,
                     const RegType& src_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpWide(MethodVerifier* verifier, const Instruction* inst,
-                        const RegType& dst_type1, const RegType& dst_type2,
-                        const RegType& src_type1, const RegType& src_type2)
+  void CheckUnaryOpWide(MethodVerifier* verifier,
+                        const Instruction* inst,
+                        const RegType& dst_type1,
+                        const RegType& dst_type2,
+                        const RegType& src_type1,
+                        const RegType& src_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpToWide(MethodVerifier* verifier, const Instruction* inst,
-                          const RegType& dst_type1, const RegType& dst_type2,
+  void CheckUnaryOpToWide(MethodVerifier* verifier,
+                          const Instruction* inst,
+                          const RegType& dst_type1,
+                          const RegType& dst_type2,
                           const RegType& src_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpFromWide(MethodVerifier* verifier, const Instruction* inst,
+  void CheckUnaryOpFromWide(MethodVerifier* verifier,
+                            const Instruction* inst,
                             const RegType& dst_type,
-                            const RegType& src_type1, const RegType& src_type2)
+                            const RegType& src_type1,
+                            const RegType& src_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -239,19 +258,28 @@
    * "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
    * against vB/vC.
    */
-  void CheckBinaryOp(MethodVerifier* verifier, const Instruction* inst,
-                     const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
+  void CheckBinaryOp(MethodVerifier* verifier,
+                     const Instruction* inst,
+                     const RegType& dst_type,
+                     const RegType& src_type1,
+                     const RegType& src_type2,
                      bool check_boolean_op)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOpWide(MethodVerifier* verifier, const Instruction* inst,
-                         const RegType& dst_type1, const RegType& dst_type2,
-                         const RegType& src_type1_1, const RegType& src_type1_2,
-                         const RegType& src_type2_1, const RegType& src_type2_2)
+  void CheckBinaryOpWide(MethodVerifier* verifier,
+                         const Instruction* inst,
+                         const RegType& dst_type1,
+                         const RegType& dst_type2,
+                         const RegType& src_type1_1,
+                         const RegType& src_type1_2,
+                         const RegType& src_type2_1,
+                         const RegType& src_type2_2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOpWideShift(MethodVerifier* verifier, const Instruction* inst,
-                              const RegType& long_lo_type, const RegType& long_hi_type,
+  void CheckBinaryOpWideShift(MethodVerifier* verifier,
+                              const Instruction* inst,
+                              const RegType& long_lo_type,
+                              const RegType& long_hi_type,
                               const RegType& int_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -259,20 +287,28 @@
    * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
    * are verified against vA/vB, then "dst_type" is stored into vA.
    */
-  void CheckBinaryOp2addr(MethodVerifier* verifier, const Instruction* inst,
+  void CheckBinaryOp2addr(MethodVerifier* verifier,
+                          const Instruction* inst,
                           const RegType& dst_type,
-                          const RegType& src_type1, const RegType& src_type2,
+                          const RegType& src_type1,
+                          const RegType& src_type2,
                           bool check_boolean_op)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOp2addrWide(MethodVerifier* verifier, const Instruction* inst,
-                              const RegType& dst_type1, const RegType& dst_type2,
-                              const RegType& src_type1_1, const RegType& src_type1_2,
-                              const RegType& src_type2_1, const RegType& src_type2_2)
+  void CheckBinaryOp2addrWide(MethodVerifier* verifier,
+                              const Instruction* inst,
+                              const RegType& dst_type1,
+                              const RegType& dst_type2,
+                              const RegType& src_type1_1,
+                              const RegType& src_type1_2,
+                              const RegType& src_type2_1,
+                              const RegType& src_type2_2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOp2addrWideShift(MethodVerifier* verifier, const Instruction* inst,
-                                   const RegType& long_lo_type, const RegType& long_hi_type,
+  void CheckBinaryOp2addrWideShift(MethodVerifier* verifier,
+                                   const Instruction* inst,
+                                   const RegType& long_lo_type,
+                                   const RegType& long_hi_type,
                                    const RegType& int_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -282,9 +318,12 @@
    *
    * If "check_boolean_op" is set, we use the constant value in vC.
    */
-  void CheckLiteralOp(MethodVerifier* verifier, const Instruction* inst,
-                      const RegType& dst_type, const RegType& src_type,
-                      bool check_boolean_op, bool is_lit16)
+  void CheckLiteralOp(MethodVerifier* verifier,
+                      const Instruction* inst,
+                      const RegType& dst_type,
+                      const RegType& src_type,
+                      bool check_boolean_op,
+                      bool is_lit16)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
@@ -401,6 +440,11 @@
   DISALLOW_COPY_AND_ASSIGN(RegisterLine);
 };
 
+class RegisterLineArenaDelete : public ArenaDelete<RegisterLine> {
+ public:
+  void operator()(RegisterLine* ptr) const;
+};
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/test/003-omnibus-opcodes/build b/test/003-omnibus-opcodes/build
index faa2983..56e8784 100644
--- a/test/003-omnibus-opcodes/build
+++ b/test/003-omnibus-opcodes/build
@@ -23,8 +23,8 @@
 ${JAVAC} -d classes `find src2 -name '*.java'`
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
   fi
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index be7888b..7045482 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -639,3 +639,23 @@
 extern "C" JNIEXPORT jlong JNICALL Java_Main_testGetMethodID(JNIEnv* env, jclass, jclass c) {
   return reinterpret_cast<jlong>(env->GetMethodID(c, "a", "()V"));
 }
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enterJniCriticalSection(JNIEnv* env, jclass,
+                                                                    jint arraySize,
+                                                                    jbyteArray array0,
+                                                                    jbyteArray array1) {
+  for (int i = 0; i < 50000; ++i) {
+    char* data0 = reinterpret_cast<char*>(env->GetPrimitiveArrayCritical(array0, nullptr));
+    char* data1 = reinterpret_cast<char*>(env->GetPrimitiveArrayCritical(array1, nullptr));
+    bool up = i % 2 == 0;
+    for (int j = 0; j < arraySize; ++j) {
+      if (up) {
+        data1[j] = data0[j] + 1;
+      } else {
+        data0[j] = data1[j] + 1;
+      }
+    }
+    env->ReleasePrimitiveArrayCritical(array1, data1, 0);
+    env->ReleasePrimitiveArrayCritical(array0, data0, 0);
+  }
+}
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index ee3a3b9..5c39ede 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -38,6 +38,7 @@
         testNewStringObject();
         testRemoveLocalObject();
         testProxyGetMethodID();
+        testJniCriticalSectionAndGc();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -222,6 +223,35 @@
     }
 
     private static native long testGetMethodID(Class<?> c);
+
+    // Exercise GC and JNI critical sections in parallel.
+    private static void testJniCriticalSectionAndGc() {
+        Thread runGcThread = new Thread(new Runnable() {
+            @Override
+            public void run() {
+                for (int i = 0; i < 10; ++i) {
+                    Runtime.getRuntime().gc();
+                }
+            }
+        });
+        Thread jniCriticalThread = new Thread(new Runnable() {
+            @Override
+            public void run() {
+                final int arraySize = 32;
+                byte[] array0 = new byte[arraySize];
+                byte[] array1 = new byte[arraySize];
+                enterJniCriticalSection(arraySize, array0, array1);
+            }
+        });
+        jniCriticalThread.start();
+        runGcThread.start();
+        try {
+            jniCriticalThread.join();
+            runGcThread.join();
+        } catch (InterruptedException ignored) {}
+    }
+
+    private static native void enterJniCriticalSection(int arraySize, byte[] array0, byte[] array);
 }
 
 class JniCallNonvirtualTest {
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 057b351..93bee50 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -29,8 +29,8 @@
 rm 'classes/android/test/anno/ClassWithInnerAnnotationClass$MissingInnerAnnotationClass.class'
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
 fi
diff --git a/test/022-interface/build b/test/022-interface/build
index 3f8915c..5cfc7f2 100644
--- a/test/022-interface/build
+++ b/test/022-interface/build
@@ -20,8 +20,8 @@
 # Use classes that are compiled with ecj that exposes an invokeinterface
 # issue when interfaces override methods in Object
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index af25d9b..e5c9dba 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -804,6 +804,7 @@
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -825,6 +826,7 @@
     Assert.assertEquals(Math.round(-2.5f), -2);
     Assert.assertEquals(Math.round(-2.9f), -3);
     Assert.assertEquals(Math.round(-3.0f), -3);
+    Assert.assertEquals(Math.round(16777215.0f), 16777215);  // 2^24 - 1
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
diff --git a/test/085-old-style-inner-class/build b/test/085-old-style-inner-class/build
index 6f50a76..21dc662 100644
--- a/test/085-old-style-inner-class/build
+++ b/test/085-old-style-inner-class/build
@@ -23,8 +23,8 @@
 ${JAVAC} -source 1.4 -target 1.4 -d classes `find src -name '*.java'`
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   # Suppress stderr to keep the inner class warnings out of the expected output.
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes 2>/dev/null
diff --git a/test/091-override-package-private-method/build b/test/091-override-package-private-method/build
index 5a340dc..073a4ba 100755
--- a/test/091-override-package-private-method/build
+++ b/test/091-override-package-private-method/build
@@ -24,14 +24,12 @@
 mv classes/OverridePackagePrivateMethodSuper.class classes-ex
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
   zip ${TEST_NAME}-ex.jar classes.dex
 else
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/097-duplicate-method/build b/test/097-duplicate-method/build
index a855873..4525549 100644
--- a/test/097-duplicate-method/build
+++ b/test/097-duplicate-method/build
@@ -23,10 +23,10 @@
   ${JACK} --output-jack src.jack src
 
   ${JASMIN} -d classes src/*.j
-  ${JILL} classes --output jasmin.jack
+  jar cf jasmin.jill.jar -C classes .
 
   # We set jack.import.type.policy=keep-first to consider class definitions from jasmin first.
-  ${JACK} --import jasmin.jack --import src.jack -D jack.import.type.policy=keep-first --output-dex .
+  ${JACK} --import jasmin.jill.jar --import src.jack -D jack.import.type.policy=keep-first --output-dex .
 else
   ${JAVAC} -d classes src/*.java
   ${JASMIN} -d classes src/*.j
diff --git a/test/111-unresolvable-exception/build b/test/111-unresolvable-exception/build
index e772fb8..58ac26d 100644
--- a/test/111-unresolvable-exception/build
+++ b/test/111-unresolvable-exception/build
@@ -22,8 +22,8 @@
 rm classes/TestException.class
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/113-multidex/build b/test/113-multidex/build
index 8ef5c0e..4557ccd 100644
--- a/test/113-multidex/build
+++ b/test/113-multidex/build
@@ -28,14 +28,12 @@
 rm classes2/Second.class classes2/FillerA.class classes2/FillerB.class classes2/Inf*.class
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes2 --output classes2.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes2.jill.jar -C classes2 .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   mv classes.dex classes-1.dex
-  ${JACK} --import classes2.jack --output-dex .
+  ${JACK} --import classes2.jill.jar --output-dex .
   mv classes.dex classes2.dex
   mv classes-1.dex classes.dex
 else
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index b003307..852ec2e 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -1,4 +1,3 @@
-Code cache exists: './code_cache'.
 Native bridge initialized.
 Checking for getEnvValues.
 Ready for native bridge tests.
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index b70ca4f..aca356b 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -269,16 +269,12 @@
   struct stat st;
   if (app_code_cache_dir != nullptr) {
     if (stat(app_code_cache_dir, &st) == 0) {
-      if (S_ISDIR(st.st_mode)) {
-        printf("Code cache exists: '%s'.\n", app_code_cache_dir);
-      } else {
+      if (!S_ISDIR(st.st_mode)) {
         printf("Code cache is not a directory.\n");
       }
     } else {
       perror("Error when stat-ing the code_cache:");
     }
-  } else {
-    printf("app_code_cache_dir is null.\n");
   }
 
   if (art_cbs != nullptr) {
diff --git a/test/121-modifiers/build b/test/121-modifiers/build
index 85b69e9..771dd51 100644
--- a/test/121-modifiers/build
+++ b/test/121-modifiers/build
@@ -31,9 +31,9 @@
 # mv Main.class A.class A\$B.class A\$C.class classes/
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
+  jar cf classes.jill.jar -C classes .
   # Workaround b/19561685: disable sanity checks to produce a DEX file with invalid modifiers.
-  ${JACK} --sanity-checks off --import classes.jack --output-dex .
+  ${JACK} --sanity-checks off --import classes.jill.jar --output-dex .
 else
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/124-missing-classes/build b/test/124-missing-classes/build
index b92ecf9..0a340a2 100644
--- a/test/124-missing-classes/build
+++ b/test/124-missing-classes/build
@@ -27,8 +27,8 @@
 rm 'classes/Main$MissingInnerClass.class'
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
 fi
diff --git a/test/126-miranda-multidex/build b/test/126-miranda-multidex/build
index b7f2118..00b9ba0 100644
--- a/test/126-miranda-multidex/build
+++ b/test/126-miranda-multidex/build
@@ -28,14 +28,12 @@
 rm classes2/Main.class classes2/MirandaAbstract.class classes2/MirandaClass*.class classes2/MirandaInterface2*.class
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes2 --output classes2.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes2.jill.jar -C classes2 .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   mv classes.dex classes-1.dex
-  ${JACK} --import classes2.jack --output-dex .
+  ${JACK} --import classes2.jill.jar --output-dex .
   mv classes.dex classes2.dex
   mv classes-1.dex classes.dex
 else
diff --git a/test/127-checker-secondarydex/build b/test/127-checker-secondarydex/build
index 0d9f4d6..7ce46ac 100755
--- a/test/127-checker-secondarydex/build
+++ b/test/127-checker-secondarydex/build
@@ -24,14 +24,12 @@
 mv classes/Super.class classes-ex
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
   zip ${TEST_NAME}-ex.jar classes.dex
 else
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/130-hprof/src-ex/Allocator.java b/test/130-hprof/src-ex/Allocator.java
new file mode 100644
index 0000000..ee75a14
--- /dev/null
+++ b/test/130-hprof/src-ex/Allocator.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple allocator that returns a boot class path object.
+public class Allocator {
+    public static Object allocObject() {
+        return new Object();
+    }
+}
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 67e5232..9868c61 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -16,6 +16,7 @@
 
 import java.io.File;
 import java.lang.ref.WeakReference;
+import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 import java.lang.reflect.InvocationTargetException;
 
@@ -34,24 +35,21 @@
         }
     }
 
-    public static void main(String[] args) {
-        // Create some data.
-        Object data[] = new Object[TEST_LENGTH];
-        for (int i = 0; i < data.length; i++) {
-            if (makeArray(i)) {
-                data[i] = new Object[TEST_LENGTH];
-            } else {
-                data[i] = String.valueOf(i);
-            }
+    private static Object allocInDifferentLoader() throws Exception {
+        final String DEX_FILE = System.getenv("DEX_LOCATION") + "/130-hprof-ex.jar";
+        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        if (pathClassLoader == null) {
+            throw new AssertionError("Couldn't find path class loader class");
         }
-        for (int i = 0; i < data.length; i++) {
-            if (makeArray(i)) {
-                Object data2[] = (Object[]) data[i];
-                fillArray(data, data2, i);
-            }
-        }
-        System.out.println("Generated data.");
+        Constructor constructor =
+            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+        ClassLoader loader = (ClassLoader)constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class allocator = loader.loadClass("Allocator");
+        return allocator.getDeclaredMethod("allocObject", null).invoke(null);
+    }
 
+    private static void createDumpAndConv() throws RuntimeException {
         File dumpFile = null;
         File convFile = null;
 
@@ -88,6 +86,43 @@
         }
     }
 
+    public static void main(String[] args) throws Exception {
+        // Create some data.
+        Object data[] = new Object[TEST_LENGTH];
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                data[i] = new Object[TEST_LENGTH];
+            } else {
+                data[i] = String.valueOf(i);
+            }
+        }
+        for (int i = 0; i < data.length; i++) {
+            if (makeArray(i)) {
+                Object data2[] = (Object[]) data[i];
+                fillArray(data, data2, i);
+            }
+        }
+        System.out.println("Generated data.");
+
+        createDumpAndConv();
+        Class klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+        if (klass == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        Method enableMethod = klass.getDeclaredMethod("enableRecentAllocations",
+                Boolean.TYPE);
+        if (enableMethod == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        enableMethod.invoke(null, true);
+        Object o = allocInDifferentLoader();
+        // Run GC to cause class unloading.
+        Runtime.getRuntime().gc();
+        createDumpAndConv();
+        // TODO: Somehow check contents of hprof file.
+        enableMethod.invoke(null, false);
+    }
+
     private static File getHprofConf() {
         // Use the java.library.path. It points to the lib directory.
         File libDir = new File(System.getProperty("java.library.path"));
diff --git a/test/442-checker-constant-folding/smali/TestCmp.smali b/test/442-checker-constant-folding/smali/TestCmp.smali
new file mode 100644
index 0000000..df631bc
--- /dev/null
+++ b/test/442-checker-constant-folding/smali/TestCmp.smali
@@ -0,0 +1,332 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCmp;
+
+.super Ljava/lang/Object;
+
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (before)
+## CHECK-DAG:     <<Const13:j\d+>>  LongConstant 13
+## CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const13>>,<<Const7>>]
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (after)
+## CHECK-DAG:                       LongConstant 13
+## CHECK-DAG:                       LongConstant 7
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLongConstants()I
+   .registers 5
+   const-wide v1, 13
+   const-wide v3, 7
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (before)
+## CHECK-DAG:     <<Const11:f\d+>>  FloatConstant 11
+## CHECK-DAG:     <<Const22:f\d+>>  FloatConstant 22
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const11>>,<<Const22>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 11
+## CHECK-DAG:                       FloatConstant 22
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatConstants()I
+   .registers 3
+   const v1, 11.f
+   const v2, 22.f
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (before)
+## CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const33>>,<<Const44>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 33
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatConstants()I
+   .registers 3
+   const v1, 33.f
+   const v2, 44.f
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (before)
+## CHECK-DAG:     <<Const55:d\d+>>  DoubleConstant 55
+## CHECK-DAG:     <<Const66:d\d+>>  DoubleConstant 66
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const55>>,<<Const66>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 55
+## CHECK-DAG:                       DoubleConstant 66
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleConstants()I
+   .registers 5
+   const-wide v1, 55.
+   const-wide v3, 66.
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (before)
+## CHECK-DAG:     <<Const77:d\d+>>  DoubleConstant 77
+## CHECK-DAG:     <<Const88:d\d+>>  DoubleConstant 88
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const77>>,<<Const88>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 77
+## CHECK-DAG:                       DoubleConstant 88
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleConstants()I
+   .registers 5
+   const-wide v1, 77.
+   const-wide v3, 88.
+   cmpl-double v0, v1, v3
+   return v0
+.end method
+
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const100:j\d+>> LongConstant 100
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const100>>,<<Const100>>]
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (after)
+## CHECK-DAG:                       LongConstant 100
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLongSameConstant()I
+   .registers 5
+   const-wide v1, 100
+   const-wide v3, 100
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const200:f\d+>> FloatConstant 200
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const200>>,<<Const200>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 200
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatSameConstant()I
+   .registers 3
+   const v1, 200.f
+   const v2, 200.f
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const300:f\d+>> FloatConstant 300
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const300>>,<<Const300>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 300
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatSameConstant()I
+   .registers 3
+   const v1, 300.f
+   const v2, 300.f
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const400:d\d+>> DoubleConstant 400
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const400>>,<<Const400>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 400
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleSameConstant()I
+   .registers 5
+   const-wide v1, 400.
+   const-wide v3, 400.
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const500:d\d+>> DoubleConstant 500
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const500>>,<<Const500>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 500
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleSameConstant()I
+   .registers 5
+   const-wide v1, 500.
+   const-wide v3, 500.
+   cmpl-double v0, v1, v3
+   return v0
+.end method
+
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const44>>,<<ConstNan>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:                       FloatConstant nan
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatConstantWithNaN()I
+   .registers 3
+   const v1, 44.f
+   const v2, NaNf
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const44>>,<<ConstNan>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:                       FloatConstant nan
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatConstantWithNaN()I
+   .registers 3
+   const v1, 44.f
+   const v2, NaNf
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const45:d\d+>>  DoubleConstant 45
+## CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const45>>,<<ConstNan>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 45
+## CHECK-DAG:                       DoubleConstant nan
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleConstantWithNaN()I
+   .registers 5
+   const-wide v1, 45.
+   const-wide v3, NaN
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const46:d\d+>>  DoubleConstant 46
+## CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const46>>,<<ConstNan>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 46
+## CHECK-DAG:                       DoubleConstant nan
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleConstantWithNaN()I
+   .registers 5
+   const-wide v1, 46.
+   const-wide v3, NaN
+   cmpl-double v0, v1, v3
+   return v0
+.end method
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 5479818..93fe397 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -14,8 +14,13 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 public class Main {
 
+  // Workaround for b/18051191.
+  class InnerClass {}
+
   public static void assertFalse(boolean condition) {
     if (condition) {
       throw new Error();
@@ -47,6 +52,68 @@
   }
 
 
+  // Wrappers around methods located in file TestCmp.smali.
+
+  public int smaliCmpLongConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLongConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtFloatConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleConstants");
+    return (Integer)m.invoke(null);
+  }
+
+  public int smaliCmpLongSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLongSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtFloatSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleSameConstant");
+    return (Integer)m.invoke(null);
+  }
+
+  public int smaliCmpGtFloatConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+
+
   /**
    * Exercise constant folding on negation.
    */
@@ -89,6 +156,44 @@
     return y;
   }
 
+  /// CHECK-START: float Main.FloatNegation() constant_folding (before)
+  /// CHECK-DAG:     <<Const42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Neg:f\d+>>      Neg [<<Const42>>]
+  /// CHECK-DAG:                       Return [<<Neg>>]
+
+  /// CHECK-START: float Main.FloatNegation() constant_folding (after)
+  /// CHECK-DAG:     <<ConstN42:f\d+>> FloatConstant -42
+  /// CHECK-DAG:                       Return [<<ConstN42>>]
+
+  /// CHECK-START: float Main.FloatNegation() constant_folding (after)
+  /// CHECK-NOT:                       Neg
+
+  public static float FloatNegation() {
+    float x, y;
+    x = 42F;
+    y = -x;
+    return y;
+  }
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (before)
+  /// CHECK-DAG:     <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:     <<Neg:d\d+>>      Neg [<<Const42>>]
+  /// CHECK-DAG:                       Return [<<Neg>>]
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (after)
+  /// CHECK-DAG:     <<ConstN42:d\d+>> DoubleConstant -42
+  /// CHECK-DAG:                       Return [<<ConstN42>>]
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (after)
+  /// CHECK-NOT:                       Neg
+
+  public static double DoubleNegation() {
+    double x, y;
+    x = 42D;
+    y = -x;
+    return y;
+  }
+
 
   /**
    * Exercise constant folding on addition.
@@ -166,6 +271,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatAddition() constant_folding (before)
+  /// CHECK-DAG:     <<Const1:f\d+>>  FloatConstant 1
+  /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
+  /// CHECK-DAG:     <<Add:f\d+>>     Add [<<Const1>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding (after)
+  /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
+  /// CHECK-DAG:                      Return [<<Const3>>]
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding (after)
+  /// CHECK-NOT:                      Add
+
+  public static float FloatAddition() {
+    float a, b, c;
+    a = 1F;
+    b = 2F;
+    c = a + b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding (before)
+  /// CHECK-DAG:     <<Const1:d\d+>>  DoubleConstant 1
+  /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
+  /// CHECK-DAG:     <<Add:d\d+>>     Add [<<Const1>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding (after)
+  /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
+  /// CHECK-DAG:                      Return [<<Const3>>]
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding (after)
+  /// CHECK-NOT:                      Add
+
+  public static double DoubleAddition() {
+    double a, b, c;
+    a = 1D;
+    b = 2D;
+    c = a + b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on subtraction.
@@ -213,6 +360,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding (before)
+  /// CHECK-DAG:     <<Const6:f\d+>>  FloatConstant 6
+  /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
+  /// CHECK-DAG:     <<Sub:f\d+>>     Sub [<<Const6>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Sub>>]
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding (after)
+  /// CHECK-DAG:     <<Const4:f\d+>>  FloatConstant 4
+  /// CHECK-DAG:                      Return [<<Const4>>]
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding (after)
+  /// CHECK-NOT:                      Sub
+
+  public static float FloatSubtraction() {
+    float a, b, c;
+    a = 6F;
+    b = 2F;
+    c = a - b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding (before)
+  /// CHECK-DAG:     <<Const6:d\d+>>  DoubleConstant 6
+  /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
+  /// CHECK-DAG:     <<Sub:d\d+>>     Sub [<<Const6>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Sub>>]
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding (after)
+  /// CHECK-DAG:     <<Const4:d\d+>>  DoubleConstant 4
+  /// CHECK-DAG:                      Return [<<Const4>>]
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding (after)
+  /// CHECK-NOT:                      Sub
+
+  public static double DoubleSubtraction() {
+    double a, b, c;
+    a = 6D;
+    b = 2D;
+    c = a - b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on multiplication.
@@ -260,6 +449,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding (before)
+  /// CHECK-DAG:     <<Const7:f\d+>>  FloatConstant 7
+  /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
+  /// CHECK-DAG:     <<Mul:f\d+>>     Mul [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:                      Return [<<Mul>>]
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding (after)
+  /// CHECK-DAG:     <<Const21:f\d+>> FloatConstant 21
+  /// CHECK-DAG:                      Return [<<Const21>>]
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding (after)
+  /// CHECK-NOT:                      Mul
+
+  public static float FloatMultiplication() {
+    float a, b, c;
+    a = 7F;
+    b = 3F;
+    c = a * b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding (before)
+  /// CHECK-DAG:     <<Const7:d\d+>>  DoubleConstant 7
+  /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
+  /// CHECK-DAG:     <<Mul:d\d+>>     Mul [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:                      Return [<<Mul>>]
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding (after)
+  /// CHECK-DAG:     <<Const21:d\d+>> DoubleConstant 21
+  /// CHECK-DAG:                      Return [<<Const21>>]
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding (after)
+  /// CHECK-NOT:                      Mul
+
+  public static double DoubleMultiplication() {
+    double a, b, c;
+    a = 7D;
+    b = 3D;
+    c = a * b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on division.
@@ -311,6 +542,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatDivision() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
+  /// CHECK-DAG:     <<Div:f\d+>>      Div [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Div>>]
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding (after)
+  /// CHECK-DAG:     <<Const3P2:f\d+>> FloatConstant 3.2
+  /// CHECK-DAG:                       Return [<<Const3P2>>]
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding (after)
+  /// CHECK-NOT:                       Div
+
+  public static float FloatDivision() {
+    float a, b, c;
+    a = 8F;
+    b = 2.5F;
+    c = a / b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
+  /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
+  /// CHECK-DAG:     <<Div:d\d+>>      Div [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Div>>]
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding (after)
+  /// CHECK-DAG:     <<Const3P2:d\d+>> DoubleConstant 3.2
+  /// CHECK-DAG:                       Return [<<Const3P2>>]
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding (after)
+  /// CHECK-NOT:                       Div
+
+  public static double DoubleDivision() {
+    double a, b, c;
+    a = 8D;
+    b = 2.5D;
+    c = a / b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on remainder.
@@ -362,6 +635,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatRemainder() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
+  /// CHECK-DAG:     <<Rem:f\d+>>      Rem [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Rem>>]
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding (after)
+  /// CHECK-DAG:     <<Const0P5:f\d+>> FloatConstant 0.5
+  /// CHECK-DAG:                       Return [<<Const0P5>>]
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding (after)
+  /// CHECK-NOT:                       Rem
+
+  public static float FloatRemainder() {
+    float a, b, c;
+    a = 8F;
+    b = 2.5F;
+    c = a % b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
+  /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
+  /// CHECK-DAG:     <<Rem:d\d+>>      Rem [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Rem>>]
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding (after)
+  /// CHECK-DAG:     <<Const0P5:d\d+>> DoubleConstant 0.5
+  /// CHECK-DAG:                       Return [<<Const0P5>>]
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding (after)
+  /// CHECK-NOT:                       Rem
+
+  public static double DoubleRemainder() {
+    double a, b, c;
+    a = 8D;
+    b = 2.5D;
+    c = a % b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on left shift.
@@ -1197,25 +1512,37 @@
   }
 
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws Exception {
     assertIntEquals(-42, IntNegation());
     assertLongEquals(-42L, LongNegation());
+    assertFloatEquals(-42F, FloatNegation());
+    assertDoubleEquals(-42D, DoubleNegation());
 
     assertIntEquals(3, IntAddition1());
     assertIntEquals(14, IntAddition2());
     assertLongEquals(3L, LongAddition());
+    assertFloatEquals(3F, FloatAddition());
+    assertDoubleEquals(3D, DoubleAddition());
 
     assertIntEquals(4, IntSubtraction());
     assertLongEquals(4L, LongSubtraction());
+    assertFloatEquals(4F, FloatSubtraction());
+    assertDoubleEquals(4D, DoubleSubtraction());
 
     assertIntEquals(21, IntMultiplication());
     assertLongEquals(21L, LongMultiplication());
+    assertFloatEquals(21F, FloatMultiplication());
+    assertDoubleEquals(21D, DoubleMultiplication());
 
     assertIntEquals(2, IntDivision());
     assertLongEquals(2L, LongDivision());
+    assertFloatEquals(3.2F, FloatDivision());
+    assertDoubleEquals(3.2D, DoubleDivision());
 
     assertIntEquals(2, IntRemainder());
     assertLongEquals(2L, LongRemainder());
+    assertFloatEquals(0.5F, FloatRemainder());
+    assertDoubleEquals(0.5D, DoubleRemainder());
 
     assertIntEquals(4, ShlIntLong());
     assertLongEquals(12L, ShlLongInt());
@@ -1259,6 +1586,24 @@
     assertFalse(CmpFloatGreaterThanNaN(arbitrary));
     assertFalse(CmpDoubleLessThanNaN(arbitrary));
 
+    Main main = new Main();
+    assertIntEquals(1, main.smaliCmpLongConstants());
+    assertIntEquals(-1, main.smaliCmpGtFloatConstants());
+    assertIntEquals(-1, main.smaliCmpLtFloatConstants());
+    assertIntEquals(-1, main.smaliCmpGtDoubleConstants());
+    assertIntEquals(-1, main.smaliCmpLtDoubleConstants());
+
+    assertIntEquals(0, main.smaliCmpLongSameConstant());
+    assertIntEquals(0, main.smaliCmpGtFloatSameConstant());
+    assertIntEquals(0, main.smaliCmpLtFloatSameConstant());
+    assertIntEquals(0, main.smaliCmpGtDoubleSameConstant());
+    assertIntEquals(0, main.smaliCmpLtDoubleSameConstant());
+
+    assertIntEquals(1, main.smaliCmpGtFloatConstantWithNaN());
+    assertIntEquals(-1, main.smaliCmpLtFloatConstantWithNaN());
+    assertIntEquals(1, main.smaliCmpGtDoubleConstantWithNaN());
+    assertIntEquals(-1, main.smaliCmpLtDoubleConstantWithNaN());
+
     assertIntEquals(33, ReturnInt33());
     assertIntEquals(2147483647, ReturnIntMax());
     assertIntEquals(0, ReturnInt0());
@@ -1275,4 +1620,10 @@
     assertDoubleEquals(34, ReturnDouble34());
     assertDoubleEquals(99.25, ReturnDouble99P25());
   }
+
+  Main() throws ClassNotFoundException {
+    testCmp = Class.forName("TestCmp");
+  }
+
+  private Class<?> testCmp;
 }
diff --git a/test/529-checker-unresolved/build b/test/529-checker-unresolved/build
index 8c3c4f8..d85035b 100644
--- a/test/529-checker-unresolved/build
+++ b/test/529-checker-unresolved/build
@@ -29,14 +29,12 @@
 mv classes/UnresolvedSuperClass.class classes-ex
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
   zip ${TEST_NAME}-ex.jar classes.dex
 else
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
index 3a1b3fc..e215ab0 100644
--- a/test/566-checker-codegen-select/src/Main.java
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -20,13 +20,6 @@
   /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK-NEXT:                  Select [{{j\d+}},{{j\d+}},<<Cond>>]
 
-  // Condition must be materialized on X86 because it would need too many
-  // registers otherwise.
-  /// CHECK-START-X86: long Main.$noinline$longSelect(long) disassembly (after)
-  /// CHECK:             LessThanOrEqual
-  /// CHECK-NEXT:          cmp
-  /// CHECK:             Select
-
   public long $noinline$longSelect(long param) {
     if (doThrow) { throw new Error(); }
     long val_true = longB;
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 0fffdfd..09e97ea 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -41,7 +41,8 @@
         (m_name.compare("$noinline$returnDouble") == 0) ||
         (m_name.compare("$noinline$returnLong") == 0) ||
         (m_name.compare("$noinline$deopt") == 0) ||
-        (m_name.compare("$noinline$inlineCache") == 0)) {
+        (m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       const OatQuickMethodHeader* header =
           Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
       if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
@@ -71,6 +72,10 @@
 }
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInInterpreter(JNIEnv*, jclass) {
+  if (!Runtime::Current()->UseJit()) {
+    // The return value is irrelevant if we're not using JIT.
+    return false;
+  }
   ScopedObjectAccess soa(Thread::Current());
   OsrVisitor visitor(soa.Self());
   visitor.WalkStack();
@@ -87,7 +92,8 @@
     ArtMethod* m = GetMethod();
     std::string m_name(m->GetName());
 
-    if (m_name.compare("$noinline$inlineCache") == 0) {
+    if ((m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
       return false;
     }
@@ -96,6 +102,9 @@
 };
 
 extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasProfilingInfo(JNIEnv*, jclass) {
+  if (!Runtime::Current()->UseJit()) {
+    return;
+  }
   ScopedObjectAccess soa(Thread::Current());
   ProfilingInfoVisitor visitor(soa.Self());
   visitor.WalkStack();
@@ -112,7 +121,8 @@
     std::string m_name(m->GetName());
 
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (m_name.compare("$noinline$inlineCache") == 0 && jit != nullptr) {
+    if ((m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
         // Sleep to yield to the compiler thread.
         sleep(0);
@@ -126,6 +136,9 @@
 };
 
 extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasOsrCode(JNIEnv*, jclass) {
+  if (!Runtime::Current()->UseJit()) {
+    return;
+  }
   ScopedObjectAccess soa(Thread::Current());
   OsrCheckVisitor visitor(soa.Self());
   visitor.WalkStack();
diff --git a/test/570-checker-osr/run b/test/570-checker-osr/run
new file mode 100755
index 0000000..24d69b4
--- /dev/null
+++ b/test/570-checker-osr/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure this test is not subject to code collection.
+exec ${RUN} "$@" --runtime-option -Xjitinitialsize:32M
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 828908a..1142d49 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -40,6 +40,9 @@
     if ($noinline$inlineCache(new SubMain(), /* isSecondInvocation */ true) != SubMain.class) {
       throw new Error("Unexpected return value");
     }
+
+    $noinline$stackOverflow(new Main(), /* isSecondInvocation */ false);
+    $noinline$stackOverflow(new SubMain(), /* isSecondInvocation */ true);
   }
 
   public static int $noinline$returnInt() {
@@ -129,7 +132,32 @@
     return Main.class;
   }
 
-  public static int[] array = new int[4];
+  public void otherInlineCache() {
+    return;
+  }
+
+  public static void $noinline$stackOverflow(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!ensureInInterpreter()) {
+      return;
+    }
+
+    // We need a ProfilingInfo object to populate the 'otherInlineCache' call.
+    ensureHasProfilingInfo();
+
+    if (isSecondInvocation) {
+      // Ensure we have an OSR code and we jump to it.
+      while (!ensureInOsrCode()) {}
+    }
+
+    for (int i = 0; i < (isSecondInvocation ? 10000000 : 1); ++i) {
+      // The first invocation of $noinline$stackOverflow will populate the inline
+      // cache with Main. The second invocation of the method, will see a SubMain
+      // and will therefore trigger deoptimization.
+      m.otherInlineCache();
+    }
+  }
 
   public static native boolean ensureInInterpreter();
   public static native boolean ensureInOsrCode();
@@ -147,4 +175,8 @@
   public Main inlineCache() {
     return new SubMain();
   }
+
+  public void otherInlineCache() {
+    return;
+  }
 }
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index 8a4cf60..59741d6 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -29,6 +29,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
     return cond ? x : y;
   }
@@ -46,6 +51,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarCst(boolean cond, int x) {
     return cond ? x : 1;
   }
@@ -63,6 +73,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntCstVar(boolean cond, int y) {
     return cond ? 1 : y;
   }
@@ -80,6 +95,12 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
     return cond ? x : y;
   }
@@ -97,6 +118,12 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongVarCst(boolean cond, long x) {
     return cond ? x : 1L;
   }
@@ -114,6 +141,12 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongCstVar(boolean cond, long y) {
     return cond ? 1L : y;
   }
@@ -168,6 +201,11 @@
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ng
 
+  /// CHECK-START-X86: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
     return a > b ? x : y;
   }
@@ -189,6 +227,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ng
 
+  /// CHECK-START-X86: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
@@ -208,6 +251,12 @@
   /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ngq
 
+  /// CHECK-START-X86: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+
   public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
     return a > b ? x : y;
   }
@@ -232,6 +281,15 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
     long result = (a > b ? x : y);
     return result + (a > b ? 0L : 1L);
diff --git a/test/574-irreducible-and-constant-area/expected.txt b/test/574-irreducible-and-constant-area/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/expected.txt
diff --git a/test/574-irreducible-and-constant-area/info.txt b/test/574-irreducible-and-constant-area/info.txt
new file mode 100644
index 0000000..e957a5a
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/info.txt
@@ -0,0 +1,3 @@
+Regression test for intrinsics on x86, which used to wrongly assume
+a HInvokeStaticOrDirect must have a special input (does not apply for irreducible
+loops).
diff --git a/test/574-irreducible-and-constant-area/run b/test/574-irreducible-and-constant-area/run
new file mode 100755
index 0000000..ffdbcc9
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Don't do relocation, as this affects this test.
+exec ${RUN} "$@" --no-relocate
diff --git a/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali b/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..d7d4346
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+.method public static simpleLoop(I)I
+   .registers 5
+   const/16 v0, 42
+   const/16 v1, 42
+   const-wide/high16 v2, 0x4000000000000000L
+   if-eq p0, v0, :other_loop_entry
+   :loop_entry
+   invoke-static {v1, v1}, LMain;->$inline$foo(FF)V
+   invoke-static {v2, v3, v2, v3}, LMain;->$inline$foo(DD)V
+   if-ne p0, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
diff --git a/test/574-irreducible-and-constant-area/src/Main.java b/test/574-irreducible-and-constant-area/src/Main.java
new file mode 100644
index 0000000..3cdd924
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    m.invoke(null, arguments);
+  }
+
+  public static void $inline$foo(float a, float b) {
+    Math.abs(a);
+    Math.max(a, b);
+    Math.min(a, b);
+  }
+
+  public static void $inline$foo(double a, double b) {
+    Math.abs(a);
+    Math.max(a, b);
+    Math.min(a, b);
+  }
+}
diff --git a/test/575-checker-isnan/expected.txt b/test/575-checker-isnan/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/575-checker-isnan/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/575-checker-isnan/info.txt b/test/575-checker-isnan/info.txt
new file mode 100644
index 0000000..5c48a6a
--- /dev/null
+++ b/test/575-checker-isnan/info.txt
@@ -0,0 +1 @@
+Unit test for float/double isNaN() operation.
diff --git a/test/575-checker-isnan/src/Main.java b/test/575-checker-isnan/src/Main.java
new file mode 100644
index 0000000..cc71e5e
--- /dev/null
+++ b/test/575-checker-isnan/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:z\d+>> InvokeStaticOrDirect
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (after)
+  /// CHECK-DAG: <<Result:z\d+>> NotEqual
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect
+  private static boolean isNaN32(float x) {
+    return Float.isNaN(x);
+  }
+
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:z\d+>> InvokeStaticOrDirect
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (after)
+  /// CHECK-DAG: <<Result:z\d+>> NotEqual
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect
+  private static boolean isNaN64(double x) {
+    return Double.isNaN(x);
+  }
+
+  public static void main(String args[]) {
+    // A few distinct numbers.
+    expectFalse(isNaN32(Float.NEGATIVE_INFINITY));
+    expectFalse(isNaN32(-1.0f));
+    expectFalse(isNaN32(-0.0f));
+    expectFalse(isNaN32(0.0f));
+    expectFalse(isNaN32(1.0f));
+    expectFalse(isNaN32(Float.POSITIVE_INFINITY));
+
+    // A few distinct subnormal numbers.
+    expectFalse(isNaN32(Float.intBitsToFloat(0x00400000)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x80400000)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x00000001)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x80000001)));
+
+    // A few NaN numbers.
+    expectTrue(isNaN32(Float.NaN));
+    expectTrue(isNaN32(0.0f / 0.0f));
+    expectTrue(isNaN32((float)Math.sqrt(-1.0f)));
+    float[] fvals = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectTrue(isNaN32(fvals[i]));
+    }
+
+    // A few distinct numbers.
+    expectFalse(isNaN64(Double.NEGATIVE_INFINITY));
+    expectFalse(isNaN32(-1.0f));
+    expectFalse(isNaN64(-0.0d));
+    expectFalse(isNaN64(0.0d));
+    expectFalse(isNaN64(1.0d));
+    expectFalse(isNaN64(Double.POSITIVE_INFINITY));
+
+    // A few distinct subnormal numbers.
+    expectFalse(isNaN64(Double.longBitsToDouble(0x0008000000000000l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x8008000000000000l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x0000000000000001l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x8000000000000001l)));
+
+    // A few NaN numbers.
+    expectTrue(isNaN64(Double.NaN));
+    expectTrue(isNaN64(0.0d / 0.0d));
+    expectTrue(isNaN64(Math.sqrt(-1.0d)));
+    double[] dvals = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectTrue(isNaN64(dvals[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectTrue(boolean value) {
+    if (!value) {
+      throw new Error("Expected True");
+    }
+  }
+
+  private static void expectFalse(boolean value) {
+    if (value) {
+      throw new Error("Expected False");
+    }
+  }
+}
diff --git a/test/575-checker-string-init-alias/expected.txt b/test/575-checker-string-init-alias/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/575-checker-string-init-alias/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/575-checker-string-init-alias/info.txt b/test/575-checker-string-init-alias/info.txt
new file mode 100644
index 0000000..a91ea64
--- /dev/null
+++ b/test/575-checker-string-init-alias/info.txt
@@ -0,0 +1,2 @@
+Test for the String.<init> change and deoptimization: make
+sure the compiler knows how to handle dex aliases.
diff --git a/test/575-checker-string-init-alias/smali/TestCase.smali b/test/575-checker-string-init-alias/smali/TestCase.smali
new file mode 100644
index 0000000..ff04b27
--- /dev/null
+++ b/test/575-checker-string-init-alias/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.field public static staticField:Ljava/lang/String;
+
+## CHECK-START: void TestCase.testNoAlias(int[], java.lang.String) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+.method public static testNoAlias([ILjava/lang/String;)V
+    .registers 6
+    const v1, 0
+    const v2, 1
+    new-instance v0, Ljava/lang/String;
+
+    # Will deoptimize.
+    aget v3, p0, v1
+
+    # Check that we're being executed by the interpreter.
+    invoke-static {}, LMain;->assertIsInterpreted()V
+
+    invoke-direct {v0, p1}, Ljava/lang/String;-><init>(Ljava/lang/String;)V
+
+    sput-object v0, LTestCase;->staticField:Ljava/lang/String;
+
+    # Will throw AIOOBE.
+    aget v3, p0, v2
+
+    return-void
+.end method
+
+## CHECK-START: void TestCase.testAlias(int[], java.lang.String) register (after)
+## CHECK:         <<New:l\d+>>    NewInstance
+## CHECK:                         Deoptimize env:[[<<New>>,<<New>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+.method public static testAlias([ILjava/lang/String;)V
+    .registers 7
+    const v2, 0
+    const v3, 1
+    new-instance v0, Ljava/lang/String;
+    move-object v1, v0
+
+    # Will deoptimize.
+    aget v4, p0, v2
+
+    # Check that we're being executed by the interpreter.
+    invoke-static {}, LMain;->assertIsInterpreted()V
+
+    invoke-direct {v1, p1}, Ljava/lang/String;-><init>(Ljava/lang/String;)V
+
+    sput-object v1, LTestCase;->staticField:Ljava/lang/String;
+
+    # Will throw AIOOBE.
+    aget v4, p0, v3
+
+    return-void
+.end method
diff --git a/test/575-checker-string-init-alias/src/Main.java b/test/575-checker-string-init-alias/src/Main.java
new file mode 100644
index 0000000..1ab3207
--- /dev/null
+++ b/test/575-checker-string-init-alias/src/Main.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static native void assertIsInterpreted();
+
+  private static void assertEqual(String expected, String actual) {
+    if (!expected.equals(actual)) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    int[] array = new int[1];
+
+    {
+      Method m = c.getMethod("testNoAlias", int[].class, String.class);
+      try {
+        m.invoke(null, new Object[] { array , "foo" });
+        throw new Error("Expected AIOOBE");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+          throw new Error("Expected AIOOBE");
+        }
+        // Ignore
+      }
+      Field field = c.getField("staticField");
+      assertEqual("foo", (String)field.get(null));
+    }
+
+    {
+      Method m = c.getMethod("testAlias", int[].class, String.class);
+      try {
+        m.invoke(null, new Object[] { array, "bar" });
+        throw new Error("Expected AIOOBE");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+          throw new Error("Expected AIOOBE");
+        }
+        // Ignore
+      }
+      Field field = c.getField("staticField");
+      assertEqual("bar", (String)field.get(null));
+    }
+  }
+}
diff --git a/test/576-polymorphic-inlining/expected.txt b/test/576-polymorphic-inlining/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/576-polymorphic-inlining/expected.txt
diff --git a/test/576-polymorphic-inlining/info.txt b/test/576-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..b3ef0c8
--- /dev/null
+++ b/test/576-polymorphic-inlining/info.txt
@@ -0,0 +1 @@
+Test for polymorphic inlining.
diff --git a/test/576-polymorphic-inlining/src/Main.java b/test/576-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..d8d09af
--- /dev/null
+++ b/test/576-polymorphic-inlining/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    for (int i = 0; i < 20000; ++i) {
+      $noinline$testVoid(new Main());
+      $noinline$testVoid(new SubMain());
+      $noinline$testVoid(new SubSubMain());
+
+      $noinline$testWithReturnValue(new Main());
+      $noinline$testWithReturnValue(new SubMain());
+      $noinline$testWithReturnValue(new SubSubMain());
+
+      $noinline$testWithBackEdge(new Main());
+      $noinline$testWithBackEdge(new SubMain());
+      $noinline$testWithBackEdge(new SubSubMain());
+    }
+  }
+
+  public static void assertIdentical(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void $noinline$testVoid(Main m) {
+    if (doThrow) throw new Error("");
+    m.willInlineVoid();
+    m.willOnlyInlineForMainVoid();
+  }
+
+  public static void $noinline$testWithReturnValue(Main m) {
+    if (doThrow) throw new Error("");
+    assertIdentical(m.getClass(), m.willInlineWithReturnValue());
+    assertIdentical(m.getClass(), m.willOnlyInlineForMainWithReturnValue());
+  }
+
+  public static void $noinline$testWithBackEdge(Main m) {
+    if (doThrow) throw new Error("");
+    for (int i = 0; i < 10; ++i) {
+      m.willInlineVoid();
+    }
+    for (int i = 0; i < 10; ++i) {
+      m.willOnlyInlineForMainVoid();
+    }
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public void willOnlyInlineForMainVoid() {
+  }
+
+  public Class willInlineWithReturnValue() {
+    return Main.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return Main.class;
+  }
+  public static boolean doThrow;
+}
+
+class SubMain extends Main {
+  public void willOnlyInlineForMainVoid() {
+    if (doThrow) throw new Error("");
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public Class willInlineWithReturnValue() {
+    return SubMain.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return SubMain.class;
+  }
+}
+
+class SubSubMain extends SubMain {
+  public Class willInlineWithReturnValue() {
+    return SubSubMain.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return SubSubMain.class;
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index b3560b6..364be59 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -42,8 +42,7 @@
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
   TEST_ART_RUN_TEST_DEPENDENCIES += \
-    $(JACK) \
-    $(JILL_JAR)
+    $(JACK)
   TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES += setup-jack-server
 endif
 
@@ -72,8 +71,8 @@
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
 	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
-	  JILL_JAR=$(abspath $(JILL_JAR)) \
 	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
 	$(hide) touch $$@
 
@@ -962,8 +961,8 @@
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
 	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
-	    JILL_JAR=$(abspath $(JILL_JAR)) \
 	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(12) \
 	      && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
diff --git a/test/run-test b/test/run-test
index faa597e..f1875d7 100755
--- a/test/run-test
+++ b/test/run-test
@@ -88,13 +88,7 @@
   export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
-# If JILL_JAR is not set, assume it is located in the prebuilts directory.
-if [ -z "$JILL_JAR" ]; then
-  export JILL_JAR="$ANDROID_BUILD_TOP/prebuilts/sdk/tools/jill.jar"
-fi
-
 export JACK="$JACK -g -cp $JACK_CLASSPATH"
-export JILL="java -jar $JILL_JAR"
 
 info="info.txt"
 build="build"
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index da5225c..d9b26bc 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -78,6 +78,7 @@
 
 Release History:
  0.4 Pending
+   Annotate char[] objects with their string values.
    Show registered native allocations for heap dumps that support it.
 
  0.3 Dec 15, 2015
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index 8b7f9ea..d7b64e2 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -76,11 +76,15 @@
    * If maxChars is negative, the returned string is not truncated.
    */
   public static String asString(Instance inst, int maxChars) {
-    if (!isInstanceOfClass(inst, "java.lang.String")) {
-      return null;
+    // The inst object could either be a java.lang.String or a char[]. If it
+    // is a char[], use that directly as the value, otherwise use the value
+    // field of the string object. The field accesses for count and offset
+    // later on will work okay regardless of what type the inst object is.
+    Object value = inst;
+    if (isInstanceOfClass(inst, "java.lang.String")) {
+      value = getField(inst, "value");
     }
 
-    Object value = getField(inst, "value");
     if (!(value instanceof ArrayInstance)) {
       return null;
     }
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 701d60e..d61a98d 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -35,6 +35,7 @@
   // class and reading the desired field.
   public static class DumpedStuff {
     public String basicString = "hello, world";
+    public char[] charArray = "char thing".toCharArray();
     public String nullString = null;
     public Object anObject = new Object();
     public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
diff --git a/tools/ahat/test/InstanceUtilsTest.java b/tools/ahat/test/InstanceUtilsTest.java
index 32f48ce..59b1c90 100644
--- a/tools/ahat/test/InstanceUtilsTest.java
+++ b/tools/ahat/test/InstanceUtilsTest.java
@@ -32,6 +32,13 @@
   }
 
   @Test
+  public void asStringCharArray() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str));
+  }
+
+  @Test
   public void asStringTruncated() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -39,6 +46,13 @@
   }
 
   @Test
+  public void asStringCharArrayTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char ", InstanceUtils.asString(str, 5));
+  }
+
+  @Test
   public void asStringExactMax() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -46,6 +60,13 @@
   }
 
   @Test
+  public void asStringCharArrayExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, 10));
+  }
+
+  @Test
   public void asStringNotTruncated() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -53,6 +74,13 @@
   }
 
   @Test
+  public void asStringCharArrayNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, 50));
+  }
+
+  @Test
   public void asStringNegativeMax() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -60,6 +88,13 @@
   }
 
   @Test
+  public void asStringCharArrayNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, -3));
+  }
+
+  @Test
   public void asStringNull() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance obj = (Instance)dump.getDumpedThing("nullString");
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 44206df..e6394a9 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -272,5 +272,10 @@
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
+},
+{
+  description: "Only work with --mode=activity",
+  result: EXEC_FAILED,
+  names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index d8ef9ba..19a61dc 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -24,19 +24,6 @@
   bug: 26155567
 },
 {
-  description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
-  result: EXEC_FAILED,
-  modes: [host],
-  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushEnabled",
-          "libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
-          "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
-          "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
-          "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
-          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
-          "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
-  bug: 26507762
-},
-{
   description: "TimeoutException on hammerhead-concurrent-collector",
   result: EXEC_FAILED,
   modes: [device],
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index f29e51f..e4af9fa 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -20,9 +20,9 @@
 fi
 
 # Jar containing all the tests.
-test_jar=${OUT_DIR-out}/host/linux-x86/framework/apache-harmony-jdwp-tests-hostdex.jar
+test_jack=${OUT_DIR-out}/host/common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
 
-if [ ! -f $test_jar ]; then
+if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
        "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
   exit 1
@@ -117,6 +117,9 @@
   art_debugee="$art_debugee -verbose:jdwp"
 fi
 
+# Use Jack with "1.8" configuration.
+export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
+
 # Run the tests using vogar.
 vogar $vm_command \
       $vm_args \
@@ -129,7 +132,8 @@
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
       --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
-      --classpath $test_jar \
+      --classpath $test_jack \
+      --toolchain jack --language JN \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test