Merge "Add Math.round tests for large integers."
diff --git a/Android.mk b/Android.mk
index 2e05d33..e762814 100644
--- a/Android.mk
+++ b/Android.mk
@@ -86,6 +86,7 @@
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/imgdiag/Android.mk
 include $(art_path)/patchoat/Android.mk
+include $(art_path)/profman/Android.mk
 include $(art_path)/dalvikvm/Android.mk
 include $(art_path)/tools/Android.mk
 include $(art_path)/tools/ahat/Android.mk
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index dc53853..02bce41 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -118,8 +118,7 @@
 ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
-# b/25928358, illegal instruction on mips64r6 with -O0
-ART_TARGET_CLANG_mips64 := false
+ART_TARGET_CLANG_mips64 :=
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 704d69a..22c0e8d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -144,6 +144,12 @@
   $(TARGET_CORE_IMAGE_default_no-pic_32) \
   oatdump
 
+# Profile assistant tests requires profman utility.
+ART_GTEST_profile_assistant_test_HOST_DEPS := \
+  $(HOST_OUT_EXECUTABLES)/profmand
+ART_GTEST_profile_assistant_test_TARGET_DEPS := \
+  profman
+
 # The path for which all the source files are relative, not actually the current directory.
 LOCAL_PATH := art
 
@@ -153,6 +159,7 @@
   dexlist/dexlist_test.cc \
   imgdiag/imgdiag_test.cc \
   oatdump/oatdump_test.cc \
+  profman/profile_assistant_test.cc \
   runtime/arch/arch_test.cc \
   runtime/arch/instruction_set_test.cc \
   runtime/arch/instruction_set_features_test.cc \
@@ -270,7 +277,6 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/profile_assistant_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index dc2c9c9..81b854e 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -291,6 +291,13 @@
   }
 
   {
+    const char* log_args = "-verbose:collector";
+    LogVerbosity log_verbosity = LogVerbosity();
+    log_verbosity.collector = true;
+    EXPECT_SINGLE_PARSE_VALUE(log_verbosity, log_args, M::Verbose);
+  }
+
+  {
     const char* log_args = "-verbose:oat";
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.oat = true;
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 740199d..c0a00cc 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -584,6 +584,8 @@
     for (size_t j = 0; j < verbose_options.size(); ++j) {
       if (verbose_options[j] == "class") {
         log_verbosity.class_linker = true;
+      } else if (verbose_options[j] == "collector") {
+        log_verbosity.collector = true;
       } else if (verbose_options[j] == "compiler") {
         log_verbosity.compiler = true;
       } else if (verbose_options[j] == "deopt") {
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b164942..3f61e8e 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -90,7 +90,6 @@
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
-	optimizing/pc_relative_fixups_x86.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
@@ -108,8 +107,7 @@
 	elf_writer.cc \
 	elf_writer_quick.cc \
 	image_writer.cc \
-	oat_writer.cc \
-	profile_assistant.cc
+	oat_writer.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
@@ -182,6 +180,7 @@
 	linker/x86/relative_patcher_x86_base.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/intrinsics_x86.cc \
+	optimizing/pc_relative_fixups_x86.cc \
 	utils/x86/assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index a19b36f..8fd20aa 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
 #define ART_COMPILER_DEBUG_ELF_DEBUG_LOC_WRITER_H_
 
+#include <cstring>
 #include <map>
 
 #include "arch/instruction_set.h"
@@ -172,11 +173,6 @@
     return;
   }
 
-  dwarf::Writer<> debug_loc(debug_loc_buffer);
-  dwarf::Writer<> debug_ranges(debug_ranges_buffer);
-  debug_info->WriteSecOffset(dwarf::DW_AT_location, debug_loc.size());
-  debug_info->WriteSecOffset(dwarf::DW_AT_start_scope, debug_ranges.size());
-
   std::vector<VariableLocation> variable_locations = GetVariableLocations(
       method_info,
       vreg,
@@ -185,6 +181,8 @@
       dex_pc_high);
 
   // Write .debug_loc entries.
+  dwarf::Writer<> debug_loc(debug_loc_buffer);
+  const size_t debug_loc_offset = debug_loc.size();
   const bool is64bit = Is64BitInstructionSet(isa);
   std::vector<uint8_t> expr_buffer;
   for (const VariableLocation& variable_location : variable_locations) {
@@ -271,6 +269,8 @@
 
   // Write .debug_ranges entries.
   // This includes ranges where the variable is in scope but the location is not known.
+  dwarf::Writer<> debug_ranges(debug_ranges_buffer);
+  size_t debug_ranges_offset = debug_ranges.size();
   for (size_t i = 0; i < variable_locations.size(); i++) {
     uint32_t low_pc = variable_locations[i].low_pc;
     uint32_t high_pc = variable_locations[i].high_pc;
@@ -294,6 +294,23 @@
     debug_ranges.PushUint32(0);
     debug_ranges.PushUint32(0);
   }
+
+  // Simple de-duplication - check whether this entry is same as the last one (or tail of it).
+  size_t debug_ranges_entry_size = debug_ranges.size() - debug_ranges_offset;
+  if (debug_ranges_offset >= debug_ranges_entry_size) {
+    size_t previous_offset = debug_ranges_offset - debug_ranges_entry_size;
+    if (memcmp(debug_ranges_buffer->data() + previous_offset,
+               debug_ranges_buffer->data() + debug_ranges_offset,
+               debug_ranges_entry_size) == 0) {
+      // Remove what we have just written and use the last entry instead.
+      debug_ranges_buffer->resize(debug_ranges_offset);
+      debug_ranges_offset = previous_offset;
+    }
+  }
+
+  // Write attributes to .debug_info.
+  debug_info->WriteSecOffset(dwarf::DW_AT_location, debug_loc_offset);
+  debug_info->WriteSecOffset(dwarf::DW_AT_start_scope, debug_ranges_offset);
 }
 
 }  // namespace debug
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index eb4915b..6f9dd6d 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -1679,9 +1679,7 @@
       if (opcode == Instruction::NEW_INSTANCE) {
         uint32_t type_idx = mir->dalvikInsn.vB;
         if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
-          // Change NEW_INSTANCE into CONST_4 of 0
-          mir->dalvikInsn.opcode = Instruction::CONST_4;
-          mir->dalvikInsn.vB = 0;
+          LOG(FATAL) << "Quick cannot compile String allocations";
         }
       } else if ((opcode == Instruction::INVOKE_DIRECT) ||
                  (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
@@ -1689,52 +1687,13 @@
         DexFileMethodInliner* inliner =
             cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
         if (inliner->IsStringInitMethodIndex(method_idx)) {
-          bool is_range = (opcode == Instruction::INVOKE_DIRECT_RANGE);
-          uint32_t orig_this_reg = is_range ? mir->dalvikInsn.vC : mir->dalvikInsn.arg[0];
-          // Remove this pointer from string init and change to static call.
-          mir->dalvikInsn.vA--;
-          if (!is_range) {
-            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC;
-            for (uint32_t i = 0; i < mir->dalvikInsn.vA; i++) {
-              mir->dalvikInsn.arg[i] = mir->dalvikInsn.arg[i + 1];
-            }
-          } else {
-            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC_RANGE;
-            mir->dalvikInsn.vC++;
-          }
-          // Insert a move-result instruction to the original this pointer reg.
-          MIR* move_result_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
-          move_result_mir->dalvikInsn.opcode = Instruction::MOVE_RESULT_OBJECT;
-          move_result_mir->dalvikInsn.vA = orig_this_reg;
-          move_result_mir->offset = mir->offset;
-          move_result_mir->m_unit_index = mir->m_unit_index;
-          bb->InsertMIRAfter(mir, move_result_mir);
-          // Add additional moves if this pointer was copied to other registers.
-          const VerifiedMethod* verified_method =
-              cu_->compiler_driver->GetVerifiedMethod(cu_->dex_file, cu_->method_idx);
-          DCHECK(verified_method != nullptr);
-          const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
-              verified_method->GetStringInitPcRegMap();
-          auto map_it = string_init_map.find(mir->offset);
-          if (map_it != string_init_map.end()) {
-            const std::set<uint32_t>& reg_set = map_it->second;
-            for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-              MIR* move_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
-              move_mir->dalvikInsn.opcode = Instruction::MOVE_OBJECT;
-              move_mir->dalvikInsn.vA = *set_it;
-              move_mir->dalvikInsn.vB = orig_this_reg;
-              move_mir->offset = mir->offset;
-              move_mir->m_unit_index = mir->m_unit_index;
-              bb->InsertMIRAfter(move_result_mir, move_mir);
-            }
-          }
+          LOG(FATAL) << "Quick cannot compile String allocations";
         }
       }
     }
   }
 }
 
-
 bool MIRGraph::EliminateSuspendChecksGate() {
   if (kLeafOptimization ||           // Incompatible (could create loops without suspend checks).
       (cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 027290f..49768de 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -509,7 +509,8 @@
 }
 
 bool QuickCompiler::CanCompileInstruction(const MIR* mir,
-                                          const DexFile& dex_file) const {
+                                          const DexFile& dex_file,
+                                          CompilationUnit* cu) const {
   switch (mir->dalvikInsn.opcode) {
     // Quick compiler won't support new instruction semantics to invoke-super into an interface
     // method
@@ -522,6 +523,13 @@
       // False if we are an interface i.e. !(java_access_flags & kAccInterface)
       return class_def != nullptr && ((class_def->GetJavaAccessFlags() & kAccInterface) == 0);
     }
+    case Instruction::NEW_INSTANCE: {
+      uint32_t type_idx = mir->dalvikInsn.vB;
+      if (cu->compiler_driver->IsStringTypeIndex(type_idx, cu->dex_file)) {
+        return false;
+      }
+      return true;
+    }
     default:
       return true;
   }
@@ -567,7 +575,7 @@
               << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
         }
         return false;
-      } else if (!CanCompileInstruction(mir, dex_file)) {
+      } else if (!CanCompileInstruction(mir, dex_file, cu)) {
         VLOG(compiler) << "Cannot compile dalvik opcode : " << mir->dalvikInsn.opcode;
         return false;
       }
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 55f45f1..f32cf86 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -75,7 +75,7 @@
   explicit QuickCompiler(CompilerDriver* driver);
 
  private:
-  bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file) const;
+  bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file, CompilationUnit* cu) const;
 
   std::unique_ptr<PassManager> pre_opt_pass_manager_;
   std::unique_ptr<PassManager> post_opt_pass_manager_;
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 0355f11..9ae2164 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -37,20 +37,16 @@
 
 namespace art {
 
-VerifiedMethod::VerifiedMethod(uint32_t encountered_error_types,
-                               bool has_runtime_throw,
-                               const SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map)
+VerifiedMethod::VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw)
     : encountered_error_types_(encountered_error_types),
-      has_runtime_throw_(has_runtime_throw),
-      string_init_pc_reg_map_(string_init_pc_reg_map) {
+      has_runtime_throw_(has_runtime_throw) {
 }
 
 const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
                                              bool compile) {
   std::unique_ptr<VerifiedMethod> verified_method(
       new VerifiedMethod(method_verifier->GetEncounteredFailureTypes(),
-                         method_verifier->HasInstructionThatWillThrow(),
-                         method_verifier->GetStringInitPcRegMap()));
+                         method_verifier->HasInstructionThatWillThrow()));
 
   if (compile) {
     /* Generate a register map. */
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 74fcb07..12d0219 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -83,14 +83,8 @@
     return has_runtime_throw_;
   }
 
-  const SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() const {
-    return string_init_pc_reg_map_;
-  }
-
  private:
-  VerifiedMethod(uint32_t encountered_error_types,
-                 bool has_runtime_throw,
-                 const SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map);
+  VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw);
 
   /*
    * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
@@ -129,10 +123,6 @@
 
   const uint32_t encountered_error_types_;
   const bool has_runtime_throw_;
-
-  // Copy of mapping generated by verifier of dex PCs of string init invocations
-  // to the set of other registers that the receiver has been copied into.
-  const SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index bc5c6ca..510613e 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -190,7 +190,8 @@
 
 void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const {
   if (swap_space_.get() != nullptr) {
-    os << " swap=" << PrettySize(swap_space_->GetSize());
+    const size_t swap_size = swap_space_->GetSize();
+    os << " swap=" << PrettySize(swap_size) << " (" << swap_size << "B)";
   }
   if (extended) {
     Thread* self = Thread::Current();
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 0d65bc7..3cb63e7 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -186,13 +186,7 @@
       } else {
         // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
-        std::string temp;
-        const DexFile::TypeId* type_id =
-           dex_file->FindTypeId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
-        if (type_id != nullptr) {
-          // medium path, needs check of static storage base being initialized
-          storage_idx = dex_file->GetIndexForTypeId(*type_id);
-        }
+        storage_idx = resolved_member->GetDeclaringClass()->FindTypeIndexInOtherDexFile(*dex_file);
       }
       if (storage_idx != DexFile::kDexNoIndex) {
         *storage_index = storage_idx;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f078bf6..a51dd32 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -378,7 +378,6 @@
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info) {
   DCHECK(compiler_options_ != nullptr);
-  DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
 
   compiler_->Init();
@@ -2732,16 +2731,18 @@
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
-  const ArenaPool* arena_pool = runtime->GetArenaPool();
-  gc::Heap* const heap = runtime->GetHeap();
-  oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
-  oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+  const ArenaPool* const arena_pool = runtime->GetArenaPool();
+  const gc::Heap* const heap = runtime->GetHeap();
+  const size_t arena_alloc = arena_pool->GetBytesAllocated();
+  const size_t java_alloc = heap->GetBytesAllocated();
+  oss << "arena alloc=" << PrettySize(arena_alloc) << " (" << arena_alloc << "B)";
+  oss << " java alloc=" << PrettySize(java_alloc) << " (" << java_alloc << "B)";
 #if defined(__BIONIC__) || defined(__GLIBC__)
-  struct mallinfo info = mallinfo();
+  const struct mallinfo info = mallinfo();
   const size_t allocated_space = static_cast<size_t>(info.uordblks);
   const size_t free_space = static_cast<size_t>(info.fordblks);
-  oss << " native alloc=" << PrettySize(allocated_space) << " free="
-      << PrettySize(free_space);
+  oss << " native alloc=" << PrettySize(allocated_space) << " (" << allocated_space << "B)"
+      << " free=" << PrettySize(free_space) << " (" << free_space << "B)";
 #endif
   compiled_method_storage_.DumpMemoryUsage(oss, extended);
   return oss.str();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 5e35cbb..d8f23f7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -138,6 +138,7 @@
       REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_);
 
   VerificationResults* GetVerificationResults() const {
+    DCHECK(Runtime::Current()->IsAotCompiler());
     return verification_results_;
   }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 73574ba..d50528e 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -124,7 +124,10 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
-    ComputeLazyFieldsForImageClasses();  // Add useful information
+    if (!compile_app_image_) {
+      // Avoid for app image since this may increase RAM and image size.
+      ComputeLazyFieldsForImageClasses();  // Add useful information
+    }
   }
   heap->CollectGarbage(false);  // Remove garbage.
 
@@ -735,20 +738,20 @@
   return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
 }
 
-bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
+bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
   bool early_exit = false;
   std::unordered_set<mirror::Class*> visited;
-  return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited);
+  return PruneAppImageClassInternal(klass, &early_exit, &visited);
 }
 
-bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal(
+bool ImageWriter::PruneAppImageClassInternal(
     mirror::Class* klass,
     bool* early_exit,
     std::unordered_set<mirror::Class*>* visited) {
   DCHECK(early_exit != nullptr);
   DCHECK(visited != nullptr);
   DCHECK(compile_app_image_);
-  if (klass == nullptr) {
+  if (klass == nullptr || IsInBootImage(klass)) {
     return false;
   }
   auto found = prune_class_memo_.find(klass);
@@ -762,7 +765,11 @@
     return false;
   }
   visited->emplace(klass);
-  bool result = IsBootClassLoaderNonImageClass(klass);
+  bool result = IsBootClassLoaderClass(klass);
+  std::string temp;
+  // Prune if not an image class, this handles any broken sets of image classes such as having a
+  // class in the set but not it's superclass.
+  result = result || !compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
   bool my_early_exit = false;  // Only for ourselves, ignore caller.
   // Remove classes that failed to verify since we don't want to have java.lang.VerifyError in the
   // app image.
@@ -775,17 +782,15 @@
     // Check interfaces since these wont be visited through VisitReferences.)
     mirror::IfTable* if_table = klass->GetIfTable();
     for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-      result = result || ContainsBootClassLoaderNonImageClassInternal(
-          if_table->GetInterface(i),
-          &my_early_exit,
-          visited);
+      result = result || PruneAppImageClassInternal(if_table->GetInterface(i),
+                                                    &my_early_exit,
+                                                    visited);
     }
   }
   if (klass->IsObjectArrayClass()) {
-    result = result || ContainsBootClassLoaderNonImageClassInternal(
-        klass->GetComponentType(),
-        &my_early_exit,
-        visited);
+    result = result || PruneAppImageClassInternal(klass->GetComponentType(),
+                                                  &my_early_exit,
+                                                  visited);
   }
   // Check static fields and their classes.
   size_t num_static_fields = klass->NumReferenceStaticFields();
@@ -798,27 +803,22 @@
       mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
       if (ref != nullptr) {
         if (ref->IsClass()) {
-          result = result ||
-                   ContainsBootClassLoaderNonImageClassInternal(
-                       ref->AsClass(),
-                       &my_early_exit,
-                       visited);
+          result = result || PruneAppImageClassInternal(ref->AsClass(),
+                                                        &my_early_exit,
+                                                        visited);
+        } else {
+          result = result || PruneAppImageClassInternal(ref->GetClass(),
+                                                        &my_early_exit,
+                                                        visited);
         }
-        result = result ||
-                 ContainsBootClassLoaderNonImageClassInternal(
-                     ref->GetClass(),
-                     &my_early_exit,
-                     visited);
       }
       field_offset = MemberOffset(field_offset.Uint32Value() +
                                   sizeof(mirror::HeapReference<mirror::Object>));
     }
   }
-  result = result ||
-           ContainsBootClassLoaderNonImageClassInternal(
-               klass->GetSuperClass(),
-               &my_early_exit,
-               visited);
+  result = result || PruneAppImageClassInternal(klass->GetSuperClass(),
+                                                &my_early_exit,
+                                                visited);
   // Erase the element we stored earlier since we are exiting the function.
   auto it = visited->find(klass);
   DCHECK(it != visited->end());
@@ -837,15 +837,21 @@
   if (klass == nullptr) {
     return false;
   }
+  if (compile_app_image_ && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+    // Already in boot image, return true.
+    return true;
+  }
+  std::string temp;
+  if (!compiler_driver_.IsImageClass(klass->GetDescriptor(&temp))) {
+    return false;
+  }
   if (compile_app_image_) {
     // For app images, we need to prune boot loader classes that are not in the boot image since
     // these may have already been loaded when the app image is loaded.
     // Keep classes in the boot image space since we don't want to re-resolve these.
-    return Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass) ||
-        !ContainsBootClassLoaderNonImageClass(klass);
+    return !PruneAppImageClass(klass);
   }
-  std::string temp;
-  return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
+  return true;
 }
 
 class NonImageClassesVisitor : public ClassVisitor {
@@ -873,6 +879,7 @@
   class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
+  VLOG(compiler) << "Pruning " << visitor.classes_to_prune_.size() << " classes";
   for (mirror::Class* klass : visitor.classes_to_prune_) {
     std::string temp;
     const char* name = klass->GetDescriptor(&temp);
@@ -891,10 +898,10 @@
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
   ReaderMutexLock mu2(self, *class_linker->DexLock());
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-    if (dex_cache == nullptr) {
+    if (self->IsJWeakCleared(data.weak_root)) {
       continue;
     }
+    mirror::DexCache* dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != nullptr && !KeepClass(klass)) {
@@ -907,10 +914,10 @@
           mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
       DCHECK(method != nullptr) << "Expected resolution method instead of null method";
       mirror::Class* declaring_class = method->GetDeclaringClass();
-      // Miranda methods may be held live by a class which was not an image class but have a
+      // Copied methods may be held live by a class which was not an image class but have a
       // declaring class which is an image class. Set it to the resolution method to be safe and
       // prevent dangling pointers.
-      if (method->IsMiranda() || !KeepClass(declaring_class)) {
+      if (method->MightBeCopied() || !KeepClass(declaring_class)) {
         mirror::DexCache::SetElementPtrSize(resolved_methods,
                                             i,
                                             resolution_method,
@@ -1820,12 +1827,16 @@
 }
 
 template <typename T>
-T* ImageWriter::NativeLocationInImage(T* obj, const char* oat_filename) {
+T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
-    ImageInfo& image_info = GetImageInfo(oat_filename);
-    return reinterpret_cast<T*>(image_info.image_begin_ + NativeOffsetInImage(obj));
+    auto it = native_object_relocations_.find(obj);
+    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+        << Runtime::Current()->GetHeap()->DumpSpaces();
+    const NativeObjectRelocation& relocation = it->second;
+    ImageInfo& image_info = GetImageInfo(relocation.oat_filename);
+    return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
   }
 }
 
@@ -1842,33 +1853,19 @@
 
 class NativeLocationVisitor {
  public:
-  explicit NativeLocationVisitor(ImageWriter* image_writer, const char* oat_filename)
-      : image_writer_(image_writer), oat_filename_(oat_filename) {}
+  explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
   template <typename T>
   T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return image_writer_->NativeLocationInImage(ptr, oat_filename_);
-  }
-
-  ArtMethod* operator()(ArtMethod* method) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    const char* oat_filename = method->IsRuntimeMethod() ? image_writer_->GetDefaultOatFilename() :
-        image_writer_->GetOatFilenameForDexCache(method->GetDexCache());
-    return image_writer_->NativeLocationInImage(method, oat_filename);
-  }
-
-  ArtField* operator()(ArtField* field) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    const char* oat_filename = image_writer_->GetOatFilenameForDexCache(field->GetDexCache());
-    return image_writer_->NativeLocationInImage(field, oat_filename);
+    return image_writer_->NativeLocationInImage(ptr);
   }
 
  private:
   ImageWriter* const image_writer_;
-  const char* oat_filename_;
 };
 
 void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
-  const char* oat_filename = GetOatFilename(orig);
-  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
+  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
 
@@ -1952,11 +1949,10 @@
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-  const char* oat_filename = GetOatFilenameForDexCache(orig_dex_cache);
   GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
-                                               NativeLocationInImage(orig_strings, oat_filename),
+                                               NativeLocationInImage(orig_strings),
                                                /*pointer size*/8u);
     orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
                                  ImageAddressVisitor(this));
@@ -1964,7 +1960,7 @@
   GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
-                                               NativeLocationInImage(orig_types, oat_filename),
+                                               NativeLocationInImage(orig_types),
                                                /*pointer size*/8u);
     orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
                                        ImageAddressVisitor(this));
@@ -1972,32 +1968,25 @@
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
-                                               NativeLocationInImage(orig_methods, oat_filename),
+                                               NativeLocationInImage(orig_methods),
                                                /*pointer size*/8u);
     ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
-      const char* method_oat_filename;
-      if (orig == nullptr || orig->IsRuntimeMethod()) {
-        method_oat_filename = default_oat_filename_;
-      } else {
-        method_oat_filename = GetOatFilenameForDexCache(orig->GetDexCache());
-      }
-      ArtMethod* copy = NativeLocationInImage(orig, method_oat_filename);
+      // NativeLocationInImage also handles runtime methods since these have relocation info.
+      ArtMethod* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
   ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
-                                               NativeLocationInImage(orig_fields, oat_filename),
+                                               NativeLocationInImage(orig_fields),
                                                /*pointer size*/8u);
     ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      const char* field_oat_filename =
-          orig == nullptr ? default_oat_filename_ : GetOatFilenameForDexCache(orig->GetDexCache());
-      ArtField* copy = NativeLocationInImage(orig, field_oat_filename);
+      ArtField* copy = NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
@@ -2089,20 +2078,10 @@
 
   copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
 
-  const char* oat_filename;
-  if (orig->IsRuntimeMethod() || compile_app_image_) {
-    oat_filename = default_oat_filename_;
-  } else {
-    auto it = dex_file_oat_filename_map_.find(orig->GetDexFile());
-    DCHECK(it != dex_file_oat_filename_map_.end()) << orig->GetDexFile()->GetLocation();
-    oat_filename = it->second;
-  }
   ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
-  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods, oat_filename),
-                                   target_ptr_size_);
+  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
   GcRoot<mirror::Class>* orig_resolved_types = orig->GetDexCacheResolvedTypes(target_ptr_size_);
-  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types, oat_filename),
-                                 target_ptr_size_);
+  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types), target_ptr_size_);
 
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
@@ -2324,6 +2303,8 @@
     image_info_map_.emplace(oat_filename, ImageInfo());
   }
   std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+  CHECK_EQ(compile_app_image, !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty())
+      << "Compiling a boot image should occur iff there are no boot image spaces loaded";
 }
 
 ImageWriter::ImageInfo::ImageInfo()
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 9371d9f..ee204c5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -410,16 +410,18 @@
   // Return true if klass is loaded by the boot class loader but not in the boot image.
   bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Return true if klass depends on a boot class loader non image class live. We want to prune
-  // these classes since we do not want any boot class loader classes in the image. This means that
+  // Return true if klass depends on a boot class loader non image class. We want to prune these
+  // classes since we do not want any boot class loader classes in the image. This means that
   // we also cannot have any classes which refer to these boot class loader non image classes.
-  bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
+  // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
+  // driver.
+  bool PruneAppImageClass(mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // early_exit is true if we had a cyclic dependency anywhere down the chain.
-  bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass,
-                                                    bool* early_exit,
-                                                    std::unordered_set<mirror::Class*>* visited)
+  bool PruneAppImageClassInternal(mirror::Class* klass,
+                                  bool* early_exit,
+                                  std::unordered_set<mirror::Class*>* visited)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
@@ -428,7 +430,7 @@
 
   // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
-  T* NativeLocationInImage(T* obj, const char* oat_filename) SHARED_REQUIRES(Locks::mutator_lock_);
+  T* NativeLocationInImage(T* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Location of where the temporary copy of the object currently is.
   template <typename T>
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3fe7861..909d682 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -23,10 +23,7 @@
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "base/unix_file/fd_file.h"
-#include "compiler_callbacks.h"
 #include "debug/elf_debug_writer.h"
-#include "dex/pass_manager.h"
-#include "dex/quick_compiler_callbacks.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "jit/debugger_interface.h"
@@ -36,7 +33,6 @@
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "thread_list.h"
-#include "verifier/method_verifier-inl.h"
 
 namespace art {
 namespace jit {
@@ -45,11 +41,10 @@
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
+extern "C" void* jit_load(bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
-  *callbacks = jit_compiler->GetCompilerCallbacks();
   *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
@@ -151,14 +146,10 @@
     instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
   }
   cumulative_logger_.reset(new CumulativeLogger("jit times"));
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
   method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              CompilerCallbacks::CallbackMode::kCompileApp));
   compiler_driver_.reset(new CompilerDriver(
       compiler_options_.get(),
-      verification_results_.get(),
+      /* verification_results */ nullptr,
       method_inliner_map_.get(),
       Compiler::kOptimizing,
       instruction_set,
@@ -251,9 +242,5 @@
   return success;
 }
 
-CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const {
-  return callbacks_.get();
-}
-
 }  // namespace jit
 }  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 894d29e..d3b404a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -415,7 +415,9 @@
     size_t visited_virtuals = 0;
     // TODO We should also check copied methods in this test.
     for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
-      EXPECT_FALSE(m.IsMiranda());
+      if (!klass->IsInterface()) {
+        EXPECT_FALSE(m.MightBeCopied());
+      }
       CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
       ++method_index;
       ++visited_virtuals;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 3eda863..c500ea4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2107,7 +2107,6 @@
   LocationSummary* locations = instruction->GetLocations();
   Register res = locations->Out().AsRegister<Register>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
-  bool gt_bias = instruction->IsGtBias();
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   //  0 if: left == right
@@ -2141,6 +2140,7 @@
     }
 
     case Primitive::kPrimFloat: {
+      bool gt_bias = instruction->IsGtBias();
       FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
       FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
       MipsLabel done;
@@ -2180,6 +2180,7 @@
       break;
     }
     case Primitive::kPrimDouble: {
+      bool gt_bias = instruction->IsGtBias();
       FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
       FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
       MipsLabel done;
@@ -3953,28 +3954,19 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
-    return;
-  }
-
+void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  Register temp = temp_location.AsRegister<Register>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
 
   // temp = object->GetClass();
-  if (receiver.IsStackSlot()) {
-    __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
-    __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
-  } else {
-    DCHECK(receiver.IsRegister());
-    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
-  }
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  DCHECK(receiver.IsRegister());
+  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3982,6 +3974,14 @@
   // T9();
   __ Jalr(T9);
   __ Nop();
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 12964b0..dd0641c 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -353,10 +353,7 @@
       MethodReference target_method) OVERRIDE;
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
-                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
-  }
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 119084e..e3a44f1 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1727,7 +1727,6 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister res = locations->Out().AsRegister<GpuRegister>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
-  bool gt_bias = instruction->IsGtBias();
 
   //  0 if: left == right
   //  1 if: left  > right
@@ -1769,7 +1768,7 @@
       __ CmpEqS(FTMP, lhs, rhs);
       __ LoadConst32(res, 0);
       __ Bc1nez(FTMP, &done);
-      if (gt_bias) {
+      if (instruction->IsGtBias()) {
         __ CmpLtS(FTMP, lhs, rhs);
         __ LoadConst32(res, -1);
         __ Bc1nez(FTMP, &done);
@@ -1791,7 +1790,7 @@
       __ CmpEqD(FTMP, lhs, rhs);
       __ LoadConst32(res, 0);
       __ Bc1nez(FTMP, &done);
-      if (gt_bias) {
+      if (instruction->IsGtBias()) {
         __ CmpLtD(FTMP, lhs, rhs);
         __ LoadConst32(res, -1);
         __ Bc1nez(FTMP, &done);
@@ -4258,4 +4257,3 @@
 
 }  // namespace mips64
 }  // namespace art
-
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 1161253..eb7315a 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -349,7 +349,7 @@
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL);
+    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64";
   }
 
  private:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3c880c2..f032f51 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,7 +26,6 @@
 #include "intrinsics_x86.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
-#include "pc_relative_fixups_x86.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -1518,30 +1517,131 @@
                                /* false_target */ nullptr);
 }
 
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  // In 32 bit mode, a long condition doesn't generate a single CC either.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition()) {
+    Primitive::Type compare_type = condition->InputAt(0)->GetType();
+    if (compare_type == Primitive::kPrimLong ||
+        Primitive::IsFloatingPointType(compare_type)) {
+      return false;
+    }
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
+}
+
 void LocationsBuilderX86::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  Primitive::Type select_type = select->GetType();
-  HInstruction* cond = select->GetCondition();
-
-  if (Primitive::IsFloatingPointType(select_type)) {
+  if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::Any());
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      if (select->InputAt(1)->IsConstant()) {
+        // Cmov can't handle a constant value.
+        locations->SetInAt(1, Location::RequiresRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
+    } else {
+      locations->SetInAt(1, Location::Any());
+    }
   }
-  locations->SetInAt(1, Location::Any());
-  if (IsBooleanValueOrMaterializedCondition(cond)) {
-    locations->SetInAt(2, Location::Any());
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
 
+void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+  Register lhs_reg = lhs.AsRegister<Register>();
+  if (rhs.IsConstant()) {
+    int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+    codegen_->Compare32BitValue(lhs_reg, value);
+  } else if (rhs.IsStackSlot()) {
+    __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+  } else {
+    __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+  }
+}
+
 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
   LocationSummary* locations = select->GetLocations();
-  NearLabel false_target;
-  GenerateTestAndBranch<NearLabel>(
-      select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
+
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction. Condition codes are right.
+          cond = X86Condition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          Register cond_reg = locations->InAt(2).AsRegister<Register>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        // We can't handle FP or long here.
+        DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+        DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
+        LocationSummary* cond_locations = condition->GetLocations();
+        GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+        cond = X86Condition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      Register cond_reg = locations->InAt(2).AsRegister<Register>();
+      __ testl(cond_reg, cond_reg);
+    }
+
+    // If the condition is true, overwrite the output, which already contains false.
+    Location false_loc = locations->InAt(0);
+    Location true_loc = locations->InAt(1);
+    if (select->GetType() == Primitive::kPrimLong) {
+      // 64 bit conditional move.
+      Register false_high = false_loc.AsRegisterPairHigh<Register>();
+      Register false_low = false_loc.AsRegisterPairLow<Register>();
+      if (true_loc.IsRegisterPair()) {
+        __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
+        __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
+      } else {
+        __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
+        __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
+      }
+    } else {
+      // 32 bit conditional move.
+      Register false_reg = false_loc.AsRegister<Register>();
+      if (true_loc.IsRegister()) {
+        __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
+      } else {
+        __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
+      }
+    }
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(
+        select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1655,15 +1755,7 @@
 
       // Clear output register: setb only sets the low byte.
       __ xorl(reg, reg);
-
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
-      } else {
-        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
-      }
+      GenerateIntCompare(lhs, rhs);
       __ setb(X86Condition(cond->GetCondition()), reg);
       return;
     }
@@ -4141,15 +4233,7 @@
 
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimInt: {
-      Register left_reg = left.AsRegister<Register>();
-      if (right.IsConstant()) {
-        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
-        codegen_->Compare32BitValue(left_reg, value);
-      } else if (right.IsStackSlot()) {
-        __ cmpl(left_reg, Address(ESP, right.GetStackIndex()));
-      } else {
-        __ cmpl(left_reg, right.AsRegister<Register>());
-      }
+      GenerateIntCompare(left, right);
       break;
     }
     case Primitive::kPrimLong: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2fb6d60..63e9b2f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -297,6 +297,7 @@
                                    HBasicBlock* default_block);
 
   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
+  void GenerateIntCompare(Location lhs, Location rhs);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 57452cc..7ddabde 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -18,8 +18,28 @@
 
 namespace art {
 
-// This visitor tries to simplify operations that yield a constant. For example
-// `input * 0` is replaced by a null constant.
+// This visitor tries to simplify instructions that can be evaluated
+// as constants.
+class HConstantFoldingVisitor : public HGraphDelegateVisitor {
+ public:
+  explicit HConstantFoldingVisitor(HGraph* graph)
+      : HGraphDelegateVisitor(graph) {}
+
+ private:
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+  void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE;
+  void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE;
+
+  void VisitTypeConversion(HTypeConversion* inst) OVERRIDE;
+  void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE;
+
+  DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
+};
+
+// This visitor tries to simplify operations with an absorbing input,
+// yielding a constant. For example `input * 0` is replaced by a
+// null constant.
 class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
  public:
   explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
@@ -44,59 +64,69 @@
   void VisitXor(HXor* instruction) OVERRIDE;
 };
 
+
 void HConstantFolding::Run() {
-  InstructionWithAbsorbingInputSimplifier simplifier(graph_);
+  HConstantFoldingVisitor visitor(graph_);
   // Process basic blocks in reverse post-order in the dominator tree,
   // so that an instruction turned into a constant, used as input of
   // another instruction, may possibly be used to turn that second
   // instruction into a constant as well.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    // Traverse this block's instructions in (forward) order and
-    // replace the ones that can be statically evaluated by a
-    // compile-time counterpart.
-    for (HInstructionIterator inst_it(block->GetInstructions());
-         !inst_it.Done(); inst_it.Advance()) {
-      HInstruction* inst = inst_it.Current();
-      if (inst->IsBinaryOperation()) {
-        // Constant folding: replace `op(a, b)' with a constant at
-        // compile time if `a' and `b' are both constants.
-        HConstant* constant = inst->AsBinaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        } else {
-          inst->Accept(&simplifier);
-        }
-      } else if (inst->IsUnaryOperation()) {
-        // Constant folding: replace `op(a)' with a constant at compile
-        // time if `a' is a constant.
-        HConstant* constant = inst->AsUnaryOperation()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsTypeConversion()) {
-        // Constant folding: replace `TypeConversion(a)' with a constant at
-        // compile time if `a' is a constant.
-        HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
-        if (constant != nullptr) {
-          inst->ReplaceWith(constant);
-          inst->GetBlock()->RemoveInstruction(inst);
-        }
-      } else if (inst->IsDivZeroCheck()) {
-        // We can safely remove the check if the input is a non-null constant.
-        HDivZeroCheck* check = inst->AsDivZeroCheck();
-        HInstruction* check_input = check->InputAt(0);
-        if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
-          check->ReplaceWith(check_input);
-          check->GetBlock()->RemoveInstruction(check);
-        }
-      }
-    }
+  visitor.VisitReversePostOrder();
+}
+
+
+void HConstantFoldingVisitor::VisitBasicBlock(HBasicBlock* block) {
+  // Traverse this block's instructions (phis don't need to be
+  // processed) in (forward) order and replace the ones that can be
+  // statically evaluated by a compile-time counterpart.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    it.Current()->Accept(this);
   }
 }
 
+void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
+  // Constant folding: replace `op(a)' with a constant at compile
+  // time if `a' is a constant.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
+  // Constant folding: replace `op(a, b)' with a constant at
+  // compile time if `a' and `b' are both constants.
+  HConstant* constant = inst->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  } else {
+    InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
+    inst->Accept(&simplifier);
+  }
+}
+
+void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
+  // Constant folding: replace `TypeConversion(a)' with a constant at
+  // compile time if `a' is a constant.
+  HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
+  if (constant != nullptr) {
+    inst->ReplaceWith(constant);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
+  // We can safely remove the check if the input is a non-null constant.
+  HInstruction* check_input = inst->InputAt(0);
+  if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
+    inst->ReplaceWith(check_input);
+    inst->GetBlock()->RemoveInstruction(inst);
+  }
+}
+
+
 void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HInstruction* left = instruction->GetLeft();
@@ -178,7 +208,7 @@
         ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) ||
          (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) {
       // Replace code looking like
-      //    CMP{G,L} dst, src, NaN
+      //    CMP{G,L}-{FLOAT,DOUBLE} dst, src, NaN
       // with
       //    CONSTANT +1 (gt bias)
       // or
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 2698b2d..e10b1d6 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -26,13 +26,20 @@
  * Optimization pass performing a simple constant-expression
  * evaluation on the SSA form.
  *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
+ *
  * This class is named art::HConstantFolding to avoid name
  * clashes with the art::ConstantPropagation class defined in
  * compiler/dex/post_opt_passes.h.
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
+  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
       : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index e6e9177..4a49c83 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -593,8 +593,9 @@
       HBasicBlock* predecessor = loop_header->GetPredecessors()[i];
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
-            "Loop header %d has multiple incoming (non back edge) blocks.",
-            id));
+            "Loop header %d has multiple incoming (non back edge) blocks: %d.",
+            id,
+            predecessor->GetBlockId()));
       }
     }
   }
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4cf0eb1..c0263e4 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -384,6 +384,13 @@
         << array_set->GetValueCanBeNull() << std::noboolalpha;
   }
 
+  void VisitCompare(HCompare* compare) OVERRIDE {
+    ComparisonBias bias = compare->GetBias();
+    StartAttributeStream("bias") << (bias == ComparisonBias::kGtBias
+                                     ? "gt"
+                                     : (bias == ComparisonBias::kLtBias ? "lt" : "none"));
+  }
+
   void VisitInvoke(HInvoke* invoke) OVERRIDE {
     StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
     StartAttributeStream("method_name") << PrettyMethod(
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a5acab8..02a1acc 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -190,28 +190,34 @@
   }
 }
 
-static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+static uint32_t FindClassIndexIn(mirror::Class* cls,
+                                 const DexFile& dex_file,
+                                 Handle<mirror::DexCache> dex_cache)
     SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t index = DexFile::kDexNoIndex;
   if (cls->GetDexCache() == nullptr) {
-    DCHECK(cls->IsArrayClass());
-    // TODO: find the class in `dex_file`.
-    return DexFile::kDexNoIndex;
+    DCHECK(cls->IsArrayClass()) << PrettyClass(cls);
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
   } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
     // TODO: deal with proxy classes.
-    return DexFile::kDexNoIndex;
   } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    index = cls->GetDexTypeIndex();
+  } else {
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
+  }
+
+  if (index != DexFile::kDexNoIndex) {
     // Update the dex cache to ensure the class is in. The generated code will
     // consider it is. We make it safe by updating the dex cache, as other
     // dex files might also load the class, and there is no guarantee the dex
     // cache of the dex file of the class will be updated.
-    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
-      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    if (dex_cache->GetResolvedType(index) == nullptr) {
+      dex_cache->SetResolvedType(index, cls);
     }
-    return cls->GetDexTypeIndex();
-  } else {
-    // TODO: find the class in `dex_file`.
-    return DexFile::kDexNoIndex;
   }
+
+  return index;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -303,7 +309,7 @@
                                                    uint32_t dex_pc) const {
   ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
   DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
-  return new (graph_->GetArena()) HInstanceFieldGet(
+  HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet(
       receiver,
       Primitive::kPrimNot,
       field->GetOffset(),
@@ -313,6 +319,9 @@
       *field->GetDexFile(),
       handles_->NewHandle(field->GetDexCache()),
       dex_pc);
+  // The class of a field is effectively final, and does not have any memory dependencies.
+  result->SetSideEffects(SideEffects::None());
+  return result;
 }
 
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
@@ -322,7 +331,8 @@
       << invoke_instruction->DebugName();
 
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  uint32_t class_index = FindClassIndexIn(
+      ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
   if (class_index == DexFile::kDexNoIndex) {
     VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
                    << " from inline cache is not inlined because its class is not"
@@ -350,32 +360,15 @@
   }
 
   // We successfully inlined, now add a guard.
-  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
-      class_linker, receiver, invoke_instruction->GetDexPc());
-
   bool is_referrer =
       (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
-  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
-                                                               class_index,
-                                                               caller_dex_file,
-                                                               is_referrer,
-                                                               invoke_instruction->GetDexPc(),
-                                                               /* needs_access_check */ false,
-                                                               /* is_in_dex_cache */ true);
-
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
-  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-      compare, invoke_instruction->GetDexPc());
-  // TODO: Extend reference type propagation to understand the guard.
-  if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
-  } else {
-    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
-  }
-  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
-  bb_cursor->InsertInstructionAfter(compare, load_class);
-  bb_cursor->InsertInstructionAfter(deoptimize, compare);
-  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  AddTypeGuard(receiver,
+               cursor,
+               bb_cursor,
+               class_index,
+               is_referrer,
+               invoke_instruction,
+               /* with_deoptimization */ true);
 
   // Run type propagation to get the guard typed, and eventually propagate the
   // type of the receiver.
@@ -386,11 +379,219 @@
   return true;
 }
 
+HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
+                                     HInstruction* cursor,
+                                     HBasicBlock* bb_cursor,
+                                     uint32_t class_index,
+                                     bool is_referrer,
+                                     HInstruction* invoke_instruction,
+                                     bool with_deoptimization) {
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  // Note that we will just compare the classes, so we don't need Java semantics access checks.
+  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
+  if (with_deoptimization) {
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
+  return compare;
+}
+
 bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         const InlineCache& ic) {
   DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
       << invoke_instruction->DebugName();
+
+  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) {
+    return true;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+
+  bool all_targets_inlined = true;
+  bool one_target_inlined = false;
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+
+    HInstruction* receiver = invoke_instruction->InputAt(0);
+    HInstruction* cursor = invoke_instruction->GetPrevious();
+    HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+    uint32_t class_index = FindClassIndexIn(
+        ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache());
+    HInstruction* return_replacement = nullptr;
+    if (class_index == DexFile::kDexNoIndex ||
+        !TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+      all_targets_inlined = false;
+    } else {
+      one_target_inlined = true;
+      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+
+      // If we have inlined all targets before, and this receiver is the last seen,
+      // we deoptimize instead of keeping the original invoke instruction.
+      bool deoptimize = all_targets_inlined &&
+          (i != InlineCache::kIndividualCacheSize - 1) &&
+          (ic.GetTypeAt(i + 1) == nullptr);
+      HInstruction* compare = AddTypeGuard(
+          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
+      if (deoptimize) {
+        if (return_replacement != nullptr) {
+          invoke_instruction->ReplaceWith(return_replacement);
+        }
+        invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+        // Because the inline cache data can be populated concurrently, we force the end of the
+        // iteration. Otherhwise, we could see a new receiver type.
+        break;
+      } else {
+        CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+      }
+    }
+  }
+
+  if (!one_target_inlined) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because none"
+                   << " of its targets could be inlined";
+    return false;
+  }
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  // Run type propagation to get the guards typed.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  rtp_fixup.Run();
+  return true;
+}
+
+void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                        HInstruction* return_replacement,
+                                                        HInstruction* invoke_instruction) {
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HBasicBlock* cursor_block = compare->GetBlock();
+  HBasicBlock* original_invoke_block = invoke_instruction->GetBlock();
+  ArenaAllocator* allocator = graph_->GetArena();
+
+  // Spit the block after the compare: `cursor_block` will now be the start of the diamond,
+  // and the returned block is the start of the then branch (that could contain multiple blocks).
+  HBasicBlock* then = cursor_block->SplitAfterForInlining(compare);
+
+  // Split the block containing the invoke before and after the invoke. The returned block
+  // of the split before will contain the invoke and will be the otherwise branch of
+  // the diamond. The returned block of the split after will be the merge block
+  // of the diamond.
+  HBasicBlock* end_then = invoke_instruction->GetBlock();
+  HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction);
+  HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction);
+
+  // If the methods we are inlining return a value, we create a phi in the merge block
+  // that will have the `invoke_instruction and the `return_replacement` as inputs.
+  if (return_replacement != nullptr) {
+    HPhi* phi = new (allocator) HPhi(
+        allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc);
+    merge->AddPhi(phi);
+    invoke_instruction->ReplaceWith(phi);
+    phi->AddInput(return_replacement);
+    phi->AddInput(invoke_instruction);
+  }
+
+  // Add the control flow instructions.
+  otherwise->AddInstruction(new (allocator) HGoto(dex_pc));
+  end_then->AddInstruction(new (allocator) HGoto(dex_pc));
+  cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc));
+
+  // Add the newly created blocks to the graph.
+  graph_->AddBlock(then);
+  graph_->AddBlock(otherwise);
+  graph_->AddBlock(merge);
+
+  // Set up successor (and implictly predecessor) relations.
+  cursor_block->AddSuccessor(otherwise);
+  cursor_block->AddSuccessor(then);
+  end_then->AddSuccessor(merge);
+  otherwise->AddSuccessor(merge);
+
+  // Set up dominance information.
+  then->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(then);
+  otherwise->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(otherwise);
+  merge->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(merge);
+
+  // Update the revert post order.
+  size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block);
+  MakeRoomFor(&graph_->reverse_post_order_, 1, index);
+  graph_->reverse_post_order_[++index] = then;
+  index = IndexOfElement(graph_->reverse_post_order_, end_then);
+  MakeRoomFor(&graph_->reverse_post_order_, 2, index);
+  graph_->reverse_post_order_[++index] = otherwise;
+  graph_->reverse_post_order_[++index] = merge;
+
+  // Set the loop information of the newly created blocks.
+  HLoopInformation* loop_info = cursor_block->GetLoopInformation();
+  if (loop_info != nullptr) {
+    then->SetLoopInformation(cursor_block->GetLoopInformation());
+    merge->SetLoopInformation(cursor_block->GetLoopInformation());
+    otherwise->SetLoopInformation(cursor_block->GetLoopInformation());
+    for (HLoopInformationOutwardIterator loop_it(*cursor_block);
+         !loop_it.Done();
+         loop_it.Advance()) {
+      loop_it.Current()->Add(then);
+      loop_it.Current()->Add(merge);
+      loop_it.Current()->Add(otherwise);
+    }
+    // In case the original invoke location was a back edge, we need to update
+    // the loop to now have the merge block as a back edge.
+    if (loop_info->IsBackEdge(*original_invoke_block)) {
+      loop_info->RemoveBackEdge(original_invoke_block);
+      loop_info->AddBackEdge(merge);
+    }
+  }
+
+  // Set the try/catch information of the newly created blocks.
+  then->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  merge->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  otherwise->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+}
+
+bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                                    ArtMethod* resolved_method,
+                                                    const InlineCache& ic) {
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJit());
   if (graph_->GetInstructionSet() == kMips64) {
@@ -557,8 +758,9 @@
 
   if (!method->GetDeclaringClass()->IsVerified()) {
     uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
-    if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
+    if (Runtime::Current()->UseJit() ||
+        !compiler_driver_->IsMethodVerifiedWithoutFailures(
+            method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -781,16 +983,16 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
   DexCompilationUnit dex_compilation_unit(
-    nullptr,
-    caller_compilation_unit_.GetClassLoader(),
-    class_linker,
-    callee_dex_file,
-    code_item,
-    resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
-    method_index,
-    resolved_method->GetAccessFlags(),
-    compiler_driver_->GetVerifiedMethod(&callee_dex_file, method_index),
-    dex_cache);
+      nullptr,
+      caller_compilation_unit_.GetClassLoader(),
+      class_linker,
+      callee_dex_file,
+      code_item,
+      resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
+      method_index,
+      resolved_method->GetAccessFlags(),
+      /* verified_method */ nullptr,
+      dex_cache);
 
   bool requires_ctor_barrier = false;
 
@@ -883,7 +1085,7 @@
   HConstantFolding fold(callee_graph);
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 9dd9bf5..cdb2167 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -101,12 +101,18 @@
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  // Try to inline targets of a polymorphic call.
   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                 ArtMethod* resolved_method,
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                            ArtMethod* resolved_method,
+                                            const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
                                            uint32_t dex_pc) const
@@ -118,6 +124,57 @@
                                 bool do_rtp)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Add a type guard on the given `receiver`. This will add to the graph:
+  // i0 = HFieldGet(receiver, klass)
+  // i1 = HLoadClass(class_index, is_referrer)
+  // i2 = HNotEqual(i0, i1)
+  //
+  // And if `with_deoptimization` is true:
+  // HDeoptimize(i2)
+  //
+  // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
+  HInstruction* AddTypeGuard(HInstruction* receiver,
+                             HInstruction* cursor,
+                             HBasicBlock* bb_cursor,
+                             uint32_t class_index,
+                             bool is_referrer,
+                             HInstruction* invoke_instruction,
+                             bool with_deoptimization)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  /*
+   * Ad-hoc implementation for implementing a diamond pattern in the graph for
+   * polymorphic inlining:
+   * 1) `compare` becomes the input of the new `HIf`.
+   * 2) Everything up until `invoke_instruction` is in the then branch (could
+   *    contain multiple blocks).
+   * 3) `invoke_instruction` is moved to the otherwise block.
+   * 4) If `return_replacement` is not null, the merge block will have
+   *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
+   *
+   * Before:
+   *             Block1
+   *             compare
+   *              ...
+   *         invoke_instruction
+   *
+   * After:
+   *            Block1
+   *            compare
+   *              if
+   *          /        \
+   *         /          \
+   *   Then block    Otherwise block
+   *      ...       invoke_instruction
+   *       \              /
+   *        \            /
+   *          Merge block
+   *  phi(return_replacement, invoke_instruction)
+   */
+  void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                HInstruction* return_replacement,
+                                                HInstruction* invoke_instruction);
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a48d06f..13d3f75 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -92,6 +92,7 @@
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
   void SimplifyCompare(HInvoke* invoke, bool has_zero_op);
+  void SimplifyIsNaN(HInvoke* invoke);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -1551,6 +1552,16 @@
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare);
 }
 
+void InstructionSimplifierVisitor::SimplifyIsNaN(HInvoke* invoke) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  // IsNaN(x) is the same as x != x.
+  HInstruction* x = invoke->InputAt(0);
+  HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc);
+  condition->SetBias(ComparisonBias::kLtBias);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, condition);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
   if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
     SimplifyStringEquals(instruction);
@@ -1568,6 +1579,9 @@
   } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum ||
              instruction->GetIntrinsic() == Intrinsics::kLongSignum) {
     SimplifyCompare(instruction, /* is_signum */ true);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kFloatIsNaN ||
+             instruction->GetIntrinsic() == Intrinsics::kDoubleIsNaN) {
+    SimplifyIsNaN(instruction);
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index cc4b6f6..7905104 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -25,6 +25,13 @@
 
 /**
  * Implements optimizations specific to each instruction.
+ *
+ * Note that graph simplifications producing a constant should be
+ * implemented in art::HConstantFolding, while graph simplifications
+ * not producing constants should be implemented in
+ * art::InstructionSimplifier.  (This convention is a choice that was
+ * made during the development of these parts of the compiler and is
+ * not bound by any technical requirement.)
  */
 class InstructionSimplifier : public HOptimization {
  public:
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index db39bc8..316e86b 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -570,6 +570,7 @@
                                    NeedsEnvironmentOrCache(intrinsic),
                                    GetSideEffects(intrinsic),
                                    GetExceptions(intrinsic));
+              MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
             }
           }
         }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3bf3f7f..2ab50bb 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -33,8 +33,8 @@
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
-  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver)
-      : HOptimization(graph, kIntrinsicsRecognizerPassName),
+  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kIntrinsicsRecognizerPassName, stats),
         driver_(driver) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 00a158b..ea8669f 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1858,8 +1858,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1867,6 +1865,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4140d94..8741fd2 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1618,8 +1618,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1627,6 +1625,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 2294713..c862964 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -113,11 +113,10 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(A0));
-      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
-      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
-      UNREACHABLE();
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
     }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
@@ -825,6 +824,220 @@
              GetAssembler());
 }
 
+// byte libcore.io.Memory.peekByte(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekByte(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  __ Lb(out, adr, 0);
+}
+
+// short libcore.io.Memory.peekShort(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Lh(out, adr, 0);
+  } else if (IsR2OrNewer()) {
+    // Unlike for words, there are no lhl/lhr instructions to load
+    // unaligned halfwords so the code loads individual bytes, in case
+    // the address isn't halfword-aligned, and assembles them into a
+    // signed halfword.
+    __ Lb(AT, adr, 1);   // This byte must be sign-extended.
+    __ Lb(out, adr, 0);  // This byte can be either sign-extended, or
+                         // zero-extended because the following
+                         // instruction overwrites the sign bits.
+    __ Ins(out, AT, 8, 24);
+  } else {
+    __ Lbu(AT, adr, 0);  // This byte must be zero-extended.  If it's not
+                         // the "or" instruction below will destroy the upper
+                         // 24 bits of the final result.
+    __ Lb(out, adr, 1);  // This byte must be sign-extended.
+    __ Sll(out, out, 8);
+    __ Or(out, out, AT);
+  }
+}
+
+// int libcore.io.Memory.peekInt(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Lw(out, adr, 0);
+  } else {
+    __ Lwr(out, adr, 0);
+    __ Lwl(out, adr, 3);
+  }
+}
+
+// long libcore.io.Memory.peekLong(long address)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register out_lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
+  Register out_hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
+
+  if (IsR6()) {
+    __ Lw(out_lo, adr, 0);
+    __ Lw(out_hi, adr, 4);
+  } else {
+    __ Lwr(out_lo, adr, 0);
+    __ Lwl(out_lo, adr, 3);
+    __ Lwr(out_hi, adr, 4);
+    __ Lwl(out_hi, adr, 7);
+  }
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+// void libcore.io.Memory.pokeByte(long address, byte value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeByte(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  __ Sb(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeShort(long address, short value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Sh(val, adr, 0);
+  } else {
+    // Unlike for words, there are no shl/shr instructions to store
+    // unaligned halfwords so the code stores individual bytes, in case
+    // the address isn't halfword-aligned.
+    __ Sb(val, adr, 0);
+    __ Srl(AT, val, 8);
+    __ Sb(AT, adr, 1);
+  }
+}
+
+// void libcore.io.Memory.pokeInt(long address, int value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>();
+
+  if (IsR6()) {
+    __ Sw(val, adr, 0);
+  } else {
+    __ Swr(val, adr, 0);
+    __ Swl(val, adr, 3);
+  }
+}
+
+// void libcore.io.Memory.pokeLong(long address, long value)
+void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  Register val_lo = invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>();
+  Register val_hi = invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>();
+
+  if (IsR6()) {
+    __ Sw(val_lo, adr, 0);
+    __ Sw(val_hi, adr, 4);
+  } else {
+    __ Swr(val_lo, adr, 0);
+    __ Swl(val_lo, adr, 3);
+    __ Swr(val_hi, adr, 4);
+    __ Swl(val_hi, adr, 7);
+  }
+}
+
+// char java.lang.String.charAt(int index)
+void IntrinsicLocationsBuilderMIPS::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+
+  // Location of reference to data array
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register idx = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  // TODO: Maybe we can support range check elimination. Overall,
+  //       though, I think it's not worth the cost.
+  // TODO: For simplicity, the index parameter is requested in a
+  //       register, so different from Quick we will not optimize the
+  //       code for constants (which would save a register).
+
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load the string size
+  __ Lw(TMP, obj, count_offset);
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Revert to slow path if idx is too large, or negative
+  __ Bgeu(idx, TMP, slow_path->GetEntryLabel());
+
+  // out = obj[2*idx].
+  __ Sll(TMP, idx, 1);                  // idx * 2
+  __ Addu(TMP, TMP, obj);               // Address of char at location idx
+  __ Lhu(out, TMP, value_offset);       // Load char at location idx
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // boolean java.lang.String.equals(Object anObject)
 void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -956,14 +1169,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRint)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekByte)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekIntNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekLongNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPeekShortNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeByte)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeIntNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeLongNative)
-UNIMPLEMENTED_INTRINSIC(MemoryPokeShortNative)
 UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread)
 UNIMPLEMENTED_INTRINSIC(UnsafeGet)
 UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
@@ -983,7 +1188,6 @@
 UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
 UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
-UNIMPLEMENTED_INTRINSIC(StringCharAt)
 UNIMPLEMENTED_INTRINSIC(StringCompareTo)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
@@ -1016,8 +1220,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1025,6 +1227,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerCompare)
 UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index ac28503..cf3a365 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1764,8 +1764,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -1773,6 +1771,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerCompare)
 UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 22cefe8..260a877 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2635,8 +2635,6 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
@@ -2644,6 +2642,8 @@
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c9a4344..93e8c00 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2717,10 +2717,10 @@
 
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
-UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
 // Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
+UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index a6b4078..33bb2e8 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -141,6 +141,7 @@
             DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
+          MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
         } else if (instruction->CanThrow()) {
           // If `instruction` can throw, we cannot move further instructions
           // that can throw as well.
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 0b5a0f1..bf56f53 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -26,8 +26,9 @@
 
 class LICM : public HOptimization {
  public:
-  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects)
-      : HOptimization(graph, kLoopInvariantCodeMotionPassName), side_effects_(side_effects) {}
+  LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats),
+        side_effects_(side_effects) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 9fb32f4..d446539 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -79,7 +79,7 @@
     graph_->BuildDominatorTree();
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
-    LICM(graph_, side_effects).Run();
+    LICM(graph_, side_effects, nullptr).Run();
   }
 
   // General building fields.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 27015c0..f36dc6e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -15,6 +15,8 @@
  */
 #include "nodes.h"
 
+#include <cfloat>
+
 #include "code_generator.h"
 #include "common_dominator.h"
 #include "ssa_builder.h"
@@ -27,6 +29,12 @@
 
 namespace art {
 
+// Enable floating-point static evaluation during constant folding
+// only if all floating-point operations and constants evaluate in the
+// range and precision of the type used (i.e., 32-bit float, 64-bit
+// double).
+static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == 0);
+
 void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) {
   ScopedObjectAccess soa(Thread::Current());
   // Create the inexact Object reference type and store it in the HGraph.
@@ -1159,6 +1167,12 @@
     return Evaluate(GetInput()->AsIntConstant());
   } else if (GetInput()->IsLongConstant()) {
     return Evaluate(GetInput()->AsLongConstant());
+  } else if (kEnableFloatingPointStaticEvaluation) {
+    if (GetInput()->IsFloatConstant()) {
+      return Evaluate(GetInput()->AsFloatConstant());
+    } else if (GetInput()->IsDoubleConstant()) {
+      return Evaluate(GetInput()->AsDoubleConstant());
+    }
   }
   return nullptr;
 }
@@ -1178,6 +1192,12 @@
     }
   } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
     return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
+  } else if (kEnableFloatingPointStaticEvaluation) {
+    if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
+      return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant());
+    } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) {
+      return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant());
+    }
   }
   return nullptr;
 }
@@ -1205,6 +1225,20 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
+  switch (rhs) {
+    case ComparisonBias::kNoBias:
+      return os << "no_bias";
+    case ComparisonBias::kGtBias:
+      return os << "gt_bias";
+    case ComparisonBias::kLtBias:
+      return os << "lt_bias";
+    default:
+      LOG(FATAL) << "Unknown ComparisonBias: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
@@ -1386,7 +1420,38 @@
   }
 }
 
-HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
+HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) {
+  DCHECK_EQ(cursor->GetBlock(), this);
+
+  HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
+                                                                    cursor->GetDexPc());
+  new_block->instructions_.first_instruction_ = cursor;
+  new_block->instructions_.last_instruction_ = instructions_.last_instruction_;
+  instructions_.last_instruction_ = cursor->previous_;
+  if (cursor->previous_ == nullptr) {
+    instructions_.first_instruction_ = nullptr;
+  } else {
+    cursor->previous_->next_ = nullptr;
+    cursor->previous_ = nullptr;
+  }
+
+  new_block->instructions_.SetBlockOfInstructions(new_block);
+
+  for (HBasicBlock* successor : GetSuccessors()) {
+    new_block->successors_.push_back(successor);
+    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
+  }
+  successors_.clear();
+
+  for (HBasicBlock* dominated : GetDominatedBlocks()) {
+    dominated->dominator_ = new_block;
+    new_block->dominated_blocks_.push_back(dominated);
+  }
+  dominated_blocks_.clear();
+  return new_block;
+}
+
+HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -1539,6 +1604,20 @@
   }
 }
 
+void HInstructionList::AddBefore(HInstruction* cursor, const HInstructionList& instruction_list) {
+  DCHECK(Contains(cursor));
+  if (!instruction_list.IsEmpty()) {
+    if (cursor == first_instruction_) {
+      first_instruction_ = instruction_list.first_instruction_;
+    } else {
+      cursor->previous_->next_ = instruction_list.first_instruction_;
+    }
+    instruction_list.last_instruction_->next_ = cursor;
+    instruction_list.first_instruction_->previous_ = cursor->previous_;
+    cursor->previous_ = instruction_list.last_instruction_;
+  }
+}
+
 void HInstructionList::Add(const HInstructionList& instruction_list) {
   if (IsEmpty()) {
     first_instruction_ = instruction_list.first_instruction_;
@@ -1781,18 +1860,6 @@
   graph_ = nullptr;
 }
 
-// Create space in `blocks` for adding `number_of_new_blocks` entries
-// starting at location `at`. Blocks after `at` are moved accordingly.
-static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
-                        size_t number_of_new_blocks,
-                        size_t after) {
-  DCHECK_LT(after, blocks->size());
-  size_t old_size = blocks->size();
-  size_t new_size = old_size + number_of_new_blocks;
-  blocks->resize(new_size);
-  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
-}
-
 void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
   DCHECK_EQ(block->GetGraph(), this);
   DCHECK(block->GetSuccessors().empty());
@@ -1846,7 +1913,8 @@
     DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
-    invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
+    // Note that we add instructions before the invoke only to simplify polymorphic inlining.
+    invoke->GetBlock()->instructions_.AddBefore(invoke, body->GetInstructions());
     body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock());
 
     // Replace the invoke with the return value of the inlined graph.
@@ -1864,7 +1932,8 @@
     // with the second half.
     ArenaAllocator* allocator = outer_graph->GetArena();
     HBasicBlock* at = invoke->GetBlock();
-    HBasicBlock* to = at->SplitAfter(invoke);
+    // Note that we split before the invoke only to simplify polymorphic inlining.
+    HBasicBlock* to = at->SplitBeforeForInlining(invoke);
 
     HBasicBlock* first = entry_block_->GetSuccessors()[0];
     DCHECK(!first->IsInLoop());
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 854854f..399afab 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -131,6 +131,7 @@
   void SetBlockOfInstructions(HBasicBlock* block) const;
 
   void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
+  void AddBefore(HInstruction* cursor, const HInstructionList& instruction_list);
   void Add(const HInstructionList& instruction_list);
 
   // Return the number of instructions in the list. This is an expensive operation.
@@ -618,6 +619,7 @@
 
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
+  friend class HInliner;             // For the reverse post order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -972,12 +974,15 @@
   // loop and try/catch information.
   HBasicBlock* SplitBefore(HInstruction* cursor);
 
-  // Split the block into two blocks just after `cursor`. Returns the newly
+  // Split the block into two blocks just before `cursor`. Returns the newly
   // created block. Note that this method just updates raw block information,
   // like predecessors, successors, dominators, and instruction list. It does not
   // update the graph, reverse post order, loop information, nor make sure the
   // blocks are consistent (for example ending with a control flow instruction).
-  HBasicBlock* SplitAfter(HInstruction* cursor);
+  HBasicBlock* SplitBeforeForInlining(HInstruction* cursor);
+
+  // Similar to `SplitBeforeForInlining` but does it after `cursor`.
+  HBasicBlock* SplitAfterForInlining(HInstruction* cursor);
 
   // Split catch block into two blocks after the original move-exception bytecode
   // instruction, or at the beginning if not present. Returns the newly created,
@@ -2063,6 +2068,7 @@
   }
 
   SideEffects GetSideEffects() const { return side_effects_; }
+  void SetSideEffects(SideEffects other) { side_effects_ = other; }
   void AddSideEffects(SideEffects other) { side_effects_.Add(other); }
 
   size_t GetLifetimePosition() const { return lifetime_position_; }
@@ -2101,7 +2107,6 @@
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
-  void SetSideEffects(SideEffects other) { side_effects_ = other; }
 
  private:
   void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
@@ -2394,7 +2399,7 @@
   }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsIntConstant());
+    DCHECK(other->IsIntConstant()) << other->DebugName();
     return other->AsIntConstant()->value_ == value_;
   }
 
@@ -2427,7 +2432,7 @@
   uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsLongConstant());
+    DCHECK(other->IsLongConstant()) << other->DebugName();
     return other->AsLongConstant()->value_ == value_;
   }
 
@@ -2449,6 +2454,92 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
+class HFloatConstant : public HConstant {
+ public:
+  float GetValue() const { return value_; }
+
+  uint64_t GetValueAsUint64() const OVERRIDE {
+    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
+  }
+
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsFloatConstant()) << other->DebugName();
+    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+
+  bool IsMinusOne() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
+  }
+  bool IsZero() const OVERRIDE {
+    return value_ == 0.0f;
+  }
+  bool IsOne() const OVERRIDE {
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
+  }
+
+  DECLARE_INSTRUCTION(FloatConstant);
+
+ private:
+  explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {}
+  explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {}
+
+  const float value_;
+
+  // Only the SsaBuilder and HGraph can create floating-point constants.
+  friend class SsaBuilder;
+  friend class HGraph;
+  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
+};
+
+class HDoubleConstant : public HConstant {
+ public:
+  double GetValue() const { return value_; }
+
+  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
+
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsDoubleConstant()) << other->DebugName();
+    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+
+  bool IsMinusOne() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
+  }
+  bool IsZero() const OVERRIDE {
+    return value_ == 0.0;
+  }
+  bool IsOne() const OVERRIDE {
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
+  }
+
+  DECLARE_INSTRUCTION(DoubleConstant);
+
+ private:
+  explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {}
+  explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc)
+      : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {}
+
+  const double value_;
+
+  // Only the SsaBuilder and HGraph can create floating-point constants.
+  friend class SsaBuilder;
+  friend class HGraph;
+  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
+};
+
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
 class HIf : public HTemplateInstruction<1> {
@@ -2650,14 +2741,16 @@
     return true;
   }
 
-  // Try to statically evaluate `operation` and return a HConstant
-  // containing the result of this evaluation.  If `operation` cannot
+  // Try to statically evaluate `this` and return a HConstant
+  // containing the result of this evaluation.  If `this` cannot
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x`.
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
+  virtual HConstant* Evaluate(HFloatConstant* x) const = 0;
+  virtual HConstant* Evaluate(HDoubleConstant* x) const = 0;
 
   DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation);
 
@@ -2720,12 +2813,17 @@
     return true;
   }
 
-  // Try to statically evaluate `operation` and return a HConstant
-  // containing the result of this evaluation.  If `operation` cannot
+  // Try to statically evaluate `this` and return a HConstant
+  // containing the result of this evaluation.  If `this` cannot
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x` and `y`.
+  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                              HNullConstant* y ATTRIBUTE_UNUSED) const {
+    VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
+    return nullptr;
+  }
   virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
   virtual HConstant* Evaluate(HIntConstant* x ATTRIBUTE_UNUSED,
@@ -2738,11 +2836,8 @@
     VLOG(compiler) << DebugName() << " is not defined for the (long, int) case.";
     return nullptr;
   }
-  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
-                              HNullConstant* y ATTRIBUTE_UNUSED) const {
-    VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
-    return nullptr;
-  }
+  virtual HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const = 0;
+  virtual HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const = 0;
 
   // Returns an input that can legally be used as the right input and is
   // constant, or null.
@@ -2766,6 +2861,8 @@
   kLtBias,  // return -1 for NaN comparisons
 };
 
+std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs);
+
 class HCondition : public HBinaryOperation {
  public:
   HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
@@ -2783,7 +2880,7 @@
   virtual IfCondition GetOppositeCondition() const = 0;
 
   bool IsGtBias() const { return bias_ == ComparisonBias::kGtBias; }
-
+  ComparisonBias GetBias() const { return bias_; }
   void SetBias(ComparisonBias bias) { bias_ = bias; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
@@ -2791,17 +2888,34 @@
   }
 
   bool IsFPConditionTrueIfNaN() const {
-    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType()));
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
     return IsGtBias() ? ((if_cond == kCondGT) || (if_cond == kCondGE)) : (if_cond == kCondNE);
   }
 
   bool IsFPConditionFalseIfNaN() const {
-    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType()));
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
     return IsGtBias() ? ((if_cond == kCondLT) || (if_cond == kCondLE)) : (if_cond == kCondEQ);
   }
 
+ protected:
+  template <typename T>
+  int32_t Compare(T x, T y) const { return x > y ? 1 : (x < y ? -1 : 0); }
+
+  template <typename T>
+  int32_t CompareFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    DCHECK_NE(GetBias(), ComparisonBias::kNoBias);
+    // Handle the bias.
+    return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compare(x, y);
+  }
+
+  // Return an integer constant containing the result of a condition evaluated at compile time.
+  HIntConstant* MakeConstantCondition(bool value, uint32_t dex_pc) const {
+    return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc);
+  }
+
  private:
   // Needed if we merge a HCompare into a HCondition.
   ComparisonBias bias_;
@@ -2817,17 +2931,25 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                       HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(1);
+    return MakeConstantCondition(true, GetDexPc());
+  }
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HEqual instruction; evaluate it as
+  // `Compare(x, y) == 0`.
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0),
+                                 GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(Equal);
@@ -2853,17 +2975,24 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
   HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                       HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(0);
+    return MakeConstantCondition(false, GetDexPc());
+  }
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HNotEqual instruction; evaluate it as
+  // `Compare(x, y) != 0`.
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(NotEqual);
@@ -2888,12 +3017,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HLessThan instruction; evaluate it as
+  // `Compare(x, y) < 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(LessThan);
@@ -2918,12 +3054,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HLessThanOrEqual instruction; evaluate it as
+  // `Compare(x, y) <= 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(LessThanOrEqual);
@@ -2948,12 +3091,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HGreaterThan instruction; evaluate it as
+  // `Compare(x, y) > 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(GreaterThan);
@@ -2978,12 +3128,19 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  // In the following Evaluate methods, a HCompare instruction has
+  // been merged into this HGreaterThanOrEqual instruction; evaluate it as
+  // `Compare(x, y) >= 0`.
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
   }
 
   DECLARE_INSTRUCTION(GreaterThanOrEqual);
@@ -3008,14 +3165,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Below);
@@ -3029,7 +3192,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) < MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HBelow);
 };
@@ -3040,14 +3205,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(BelowOrEqual);
@@ -3061,7 +3232,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) <= MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
 };
@@ -3072,14 +3245,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(Above);
@@ -3093,7 +3272,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) > MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HAbove);
 };
@@ -3104,14 +3285,20 @@
       : HCondition(first, second, dex_pc) {}
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint32_t>(x->GetValue()),
-                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(static_cast<uint64_t>(x->GetValue()),
-                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+    return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
   }
 
   DECLARE_INSTRUCTION(AboveOrEqual);
@@ -3125,7 +3312,9 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+  template <typename T> bool Compute(T x, T y) const {
+    return MakeUnsigned(x) >= MakeUnsigned(y);
+  }
 
   DISALLOW_COPY_AND_ASSIGN(HAboveOrEqual);
 };
@@ -3150,15 +3339,32 @@
   }
 
   template <typename T>
-  int32_t Compute(T x, T y) const { return x == y ? 0 : x > y ? 1 : -1; }
+  int32_t Compute(T x, T y) const { return x > y ? 1 : (x < y ? -1 : 0); }
+
+  template <typename T>
+  int32_t ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    DCHECK_NE(GetBias(), ComparisonBias::kNoBias);
+    // Handle the bias.
+    return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y);
+  }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    // Note that there is no "cmp-int" Dex instruction so we shouldn't
+    // reach this code path when processing a freshly built HIR
+    // graph. However HCompare integer instructions can be synthesized
+    // by the instruction simplifier to implement IntegerCompare and
+    // IntegerSignum intrinsics, so we have to handle this case.
+    return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+    return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
@@ -3167,8 +3373,12 @@
 
   ComparisonBias GetBias() const { return bias_; }
 
-  bool IsGtBias() { return bias_ == ComparisonBias::kGtBias; }
-
+  // Does this compare instruction have a "gt bias" (vs an "lt bias")?
+  // Only meaninfgul for floating-point comparisons.
+  bool IsGtBias() const {
+    DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
+    return bias_ == ComparisonBias::kGtBias;
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type type) {
     // MIPS64 uses a runtime call for FP comparisons.
@@ -3177,6 +3387,13 @@
 
   DECLARE_INSTRUCTION(Compare);
 
+ protected:
+  // Return an integer constant containing the result of a comparison evaluated at compile time.
+  HIntConstant* MakeConstantComparison(int32_t value, uint32_t dex_pc) const {
+    DCHECK(value == -1 || value == 0 || value == 1) << value;
+    return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc);
+  }
+
  private:
   const ComparisonBias bias_;
 
@@ -3234,92 +3451,6 @@
   DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
 };
 
-class HFloatConstant : public HConstant {
- public:
-  float GetValue() const { return value_; }
-
-  uint64_t GetValueAsUint64() const OVERRIDE {
-    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
-  }
-
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsFloatConstant());
-    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
-  }
-
-  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
-
-  bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
-  }
-  bool IsZero() const OVERRIDE {
-    return value_ == 0.0f;
-  }
-  bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
-  }
-  bool IsNaN() const {
-    return std::isnan(value_);
-  }
-
-  DECLARE_INSTRUCTION(FloatConstant);
-
- private:
-  explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {}
-  explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {}
-
-  const float value_;
-
-  // Only the SsaBuilder and HGraph can create floating-point constants.
-  friend class SsaBuilder;
-  friend class HGraph;
-  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
-};
-
-class HDoubleConstant : public HConstant {
- public:
-  double GetValue() const { return value_; }
-
-  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
-
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    DCHECK(other->IsDoubleConstant());
-    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
-  }
-
-  size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
-
-  bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
-  }
-  bool IsZero() const OVERRIDE {
-    return value_ == 0.0;
-  }
-  bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
-  }
-  bool IsNaN() const {
-    return std::isnan(value_);
-  }
-
-  DECLARE_INSTRUCTION(DoubleConstant);
-
- private:
-  explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {}
-  explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc)
-      : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {}
-
-  const double value_;
-
-  // Only the SsaBuilder and HGraph can create floating-point constants.
-  friend class SsaBuilder;
-  friend class HGraph;
-  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
-};
-
 class HNewInstance : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
@@ -3871,6 +4002,12 @@
   HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(Compute(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(Compute(x->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Neg);
 
@@ -3937,6 +4074,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Add);
 
@@ -3962,6 +4107,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Sub);
 
@@ -3989,6 +4142,14 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   DECLARE_INSTRUCTION(Mul);
 
@@ -4005,7 +4166,8 @@
       : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
 
   template <typename T>
-  T Compute(T x, T y) const {
+  T ComputeIntegral(T x, T y) const {
+    DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType();
     // Our graph structure ensures we never have 0 for `y` during
     // constant folding.
     DCHECK_NE(y, 0);
@@ -4013,13 +4175,27 @@
     return (y == -1) ? -x : x / y;
   }
 
+  template <typename T>
+  T ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+    return x / y;
+  }
+
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -4042,7 +4218,8 @@
       : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
 
   template <typename T>
-  T Compute(T x, T y) const {
+  T ComputeIntegral(T x, T y) const {
+    DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType();
     // Our graph structure ensures we never have 0 for `y` during
     // constant folding.
     DCHECK_NE(y, 0);
@@ -4050,15 +4227,28 @@
     return (y == -1) ? 0 : x % y;
   }
 
+  template <typename T>
+  T ComputeFP(T x, T y) const {
+    DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+    return std::fmod(x, y);
+  }
+
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
-
+  HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -4125,6 +4315,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Shl);
 
@@ -4161,6 +4361,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Shr);
 
@@ -4198,6 +4408,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(UShr);
 
@@ -4234,6 +4454,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(And);
 
@@ -4270,6 +4500,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Or);
 
@@ -4306,6 +4546,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Xor);
 
@@ -4344,6 +4594,16 @@
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Ror);
 
@@ -4410,6 +4670,14 @@
   HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(Not);
 
@@ -4428,7 +4696,7 @@
   }
 
   template <typename T> bool Compute(T x) const {
-    DCHECK(IsUint<1>(x));
+    DCHECK(IsUint<1>(x)) << x;
     return !x;
   }
 
@@ -4439,6 +4707,14 @@
     LOG(FATAL) << DebugName() << " is not defined for long values";
     UNREACHABLE();
   }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
 
   DECLARE_INSTRUCTION(BooleanNot);
 
@@ -4786,10 +5062,10 @@
       DCHECK_EQ(GetArray(), other->GetArray());
       DCHECK_EQ(GetIndex(), other->GetIndex());
       if (Primitive::IsIntOrLongType(GetType())) {
-        DCHECK(Primitive::IsFloatingPointType(other->GetType()));
+        DCHECK(Primitive::IsFloatingPointType(other->GetType())) << other->GetType();
       } else {
-        DCHECK(Primitive::IsFloatingPointType(GetType()));
-        DCHECK(Primitive::IsIntOrLongType(other->GetType()));
+        DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType();
+        DCHECK(Primitive::IsIntOrLongType(other->GetType())) << other->GetType();
       }
     }
     return result;
@@ -6004,7 +6280,7 @@
   } else if (constant->IsLongConstant()) {
     return constant->AsLongConstant()->GetValue();
   } else {
-    DCHECK(constant->IsNullConstant());
+    DCHECK(constant->IsNullConstant()) << constant->DebugName();
     return 0;
   }
 }
@@ -6099,6 +6375,18 @@
   DISALLOW_COPY_AND_ASSIGN(SwitchTable);
 };
 
+// Create space in `blocks` for adding `number_of_new_blocks` entries
+// starting at location `at`. Blocks after `at` are moved accordingly.
+inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
+                        size_t number_of_new_blocks,
+                        size_t after) {
+  DCHECK_LT(after, blocks->size());
+  size_t old_size = blocks->size();
+  size_t new_size = old_size + number_of_new_blocks;
+  blocks->resize(new_size);
+  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 12b748b..b1891c9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -505,12 +505,12 @@
       graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
-  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph);
+  HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
-  LICM* licm = new (arena) LICM(graph, *side_effects);
+  LICM* licm = new (arena) LICM(graph, *side_effects, stats);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
@@ -519,7 +519,7 @@
       graph, stats, "instruction_simplifier_after_bce");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
-  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
+  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -651,7 +651,7 @@
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
-    compiler_driver->GetVerifiedMethod(&dex_file, method_idx), dex_cache);
+    nullptr, dex_cache);
 
   bool requires_barrier = dex_compilation_unit.IsConstructor()
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 52a7b10..179004b 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -56,6 +56,10 @@
   kMonomorphicCall,
   kPolymorphicCall,
   kMegamorphicCall,
+  kBooleanSimplified,
+  kIntrinsicRecognized,
+  kLoopInvariantMoved,
+  kSelectGenerated,
   kLastStat
 };
 
@@ -124,7 +128,11 @@
       case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
       case kMonomorphicCall: name = "MonomorphicCall"; break;
       case kPolymorphicCall: name = "PolymorphicCall"; break;
-      case kMegamorphicCall: name = "kMegamorphicCall"; break;
+      case kMegamorphicCall: name = "MegamorphicCall"; break;
+      case kBooleanSimplified : name = "BooleanSimplified"; break;
+      case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
+      case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
+      case kSelectGenerated : name = "SelectGenerated"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 324d84f..0ad104e 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -138,15 +138,7 @@
   }
 
   if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) {
-    if (GetGraph()->GetInstructionSet() == kX86) {
-      // Long values and long condition inputs result in 8 required core registers.
-      // We don't have that many on x86. Materialize the condition in such case.
-      return user->GetType() != Primitive::kPrimLong ||
-             condition->InputAt(1)->GetType() != Primitive::kPrimLong ||
-             condition->InputAt(1)->IsConstant();
-    } else {
-      return true;
-    }
+    return true;
   }
 
   return false;
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 105b30a..e52476e 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -141,6 +141,8 @@
       block->MergeWith(merge_block);
     }
 
+    MaybeRecordStat(MethodCompilationStat::kSelectGenerated);
+
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
     // entry block. Any following blocks would have had the join block
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index f9d6d4d..c6dca58 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -47,8 +47,8 @@
 
 class HSelectGenerator : public HOptimization {
  public:
-  explicit HSelectGenerator(HGraph* graph)
-    : HOptimization(graph, kSelectGeneratorPassName) {}
+  HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, kSelectGeneratorPassName, stats) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 43f2499..09ca8b7 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@
   return true;
 }
 
+static bool HasAliasInEnvironments(HInstruction* instruction) {
+  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+       !use_it.Done();
+       use_it.Advance()) {
+    HEnvironment* use = use_it.Current()->GetUser();
+    HUseListNode<HEnvironment*>* next = use_it.Current()->GetNext();
+    if (next != nullptr && next->GetUser() == use) {
+      return true;
+    }
+  }
+
+  if (kIsDebugBuild) {
+    // Do a quadratic search to ensure same environment uses are next
+    // to each other.
+    for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+         !use_it.Done();
+         use_it.Advance()) {
+      HUseListNode<HEnvironment*>* current = use_it.Current();
+      HUseListNode<HEnvironment*>* next = current->GetNext();
+      while (next != nullptr) {
+        DCHECK(next->GetUser() != current->GetUser());
+        next = next->GetNext();
+      }
+    }
+  }
+  return false;
+}
+
 void SsaBuilder::RemoveRedundantUninitializedStrings() {
   if (GetGraph()->IsDebuggable()) {
     // Do not perform the optimization for consistency with the interpreter
@@ -433,7 +461,7 @@
     // Replace NewInstance of String with NullConstant if not used prior to
     // calling StringFactory. In case of deoptimization, the interpreter is
     // expected to skip null check on the `this` argument of the StringFactory call.
-    if (!new_instance->HasNonEnvironmentUses()) {
+    if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
       new_instance->ReplaceWith(GetGraph()->GetNullConstant());
       new_instance->GetBlock()->RemoveInstruction(new_instance);
 
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
deleted file mode 100644
index 85335ef..0000000
--- a/compiler/profile_assistant.cc
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "profile_assistant.h"
-
-#include "base/unix_file/fd_file.h"
-#include "os.h"
-
-namespace art {
-
-// Minimum number of new methods that profiles must contain to enable recompilation.
-static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
-
-bool ProfileAssistant::ProcessProfilesInternal(
-        const std::vector<ScopedFlock>& profile_files,
-        const std::vector<ScopedFlock>& reference_profile_files,
-        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
-  DCHECK(!profile_files.empty());
-  DCHECK(!reference_profile_files.empty() ||
-      (profile_files.size() == reference_profile_files.size()));
-
-  std::vector<ProfileCompilationInfo> new_info(profile_files.size());
-  bool should_compile = false;
-  // Read the main profile files.
-  for (size_t i = 0; i < new_info.size(); i++) {
-    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
-      LOG(WARNING) << "Could not load profile file at index " << i;
-      return false;
-    }
-    // Do we have enough new profiled methods that will make the compilation worthwhile?
-    should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
-  }
-
-  if (!should_compile) {
-    return true;
-  }
-
-  std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
-  // Merge information.
-  for (size_t i = 0; i < new_info.size(); i++) {
-    if (!reference_profile_files.empty()) {
-      if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) {
-        LOG(WARNING) << "Could not load reference profile file at index " << i;
-        return false;
-      }
-    }
-    // Merge all data into a single object.
-    if (!result->Load(new_info[i])) {
-      LOG(WARNING) << "Could not merge profile data at index " << i;
-      return false;
-    }
-  }
-  // We were successful in merging all profile information. Update the files.
-  for (size_t i = 0; i < new_info.size(); i++) {
-    if (!reference_profile_files.empty()) {
-      if (!reference_profile_files[i].GetFile()->ClearContent()) {
-        PLOG(WARNING) << "Could not clear reference profile file at index " << i;
-        return false;
-      }
-      if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) {
-        LOG(WARNING) << "Could not save reference profile file at index " << i;
-        return false;
-      }
-      if (!profile_files[i].GetFile()->ClearContent()) {
-        PLOG(WARNING) << "Could not clear profile file at index " << i;
-        return false;
-      }
-    }
-  }
-
-  *profile_compilation_info = result.release();
-  return true;
-}
-
-class ScopedCollectionFlock {
- public:
-  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
-
-  // Will block until all the locks are acquired.
-  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
-    for (size_t i = 0; i < filenames.size(); i++) {
-      if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) {
-        *error += " (index=" + std::to_string(i) + ")";
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Will block until all the locks are acquired.
-  bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) {
-    for (size_t i = 0; i < fds.size(); i++) {
-      // We do not own the descriptor, so disable auto-close and don't check usage.
-      File file(fds[i], false);
-      file.DisableAutoClose();
-      if (!flocks_[i].Init(&file, error)) {
-        *error += " (index=" + std::to_string(i) + ")";
-        return false;
-      }
-    }
-    return true;
-  }
-
-  const std::vector<ScopedFlock>& Get() const { return flocks_; }
-
- private:
-  std::vector<ScopedFlock> flocks_;
-};
-
-bool ProfileAssistant::ProcessProfiles(
-        const std::vector<uint32_t>& profile_files_fd,
-        const std::vector<uint32_t>& reference_profile_files_fd,
-        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
-  *profile_compilation_info = nullptr;
-
-  std::string error;
-  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
-  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
-    LOG(WARNING) << "Could not lock profile files: " << error;
-    return false;
-  }
-  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size());
-  if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) {
-    LOG(WARNING) << "Could not lock reference profile files: " << error;
-    return false;
-  }
-
-  return ProcessProfilesInternal(profile_files_flocks.Get(),
-                                 reference_profile_files_flocks.Get(),
-                                 profile_compilation_info);
-}
-
-bool ProfileAssistant::ProcessProfiles(
-        const std::vector<std::string>& profile_files,
-        const std::vector<std::string>& reference_profile_files,
-        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
-  *profile_compilation_info = nullptr;
-
-  std::string error;
-  ScopedCollectionFlock profile_files_flocks(profile_files.size());
-  if (!profile_files_flocks.Init(profile_files, &error)) {
-    LOG(WARNING) << "Could not lock profile files: " << error;
-    return false;
-  }
-  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size());
-  if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) {
-    LOG(WARNING) << "Could not lock reference profile files: " << error;
-    return false;
-  }
-
-  return ProcessProfilesInternal(profile_files_flocks.Get(),
-                                 reference_profile_files_flocks.Get(),
-                                 profile_compilation_info);
-}
-
-}  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
deleted file mode 100644
index ad5e216..0000000
--- a/compiler/profile_assistant.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_PROFILE_ASSISTANT_H_
-#define ART_COMPILER_PROFILE_ASSISTANT_H_
-
-#include <string>
-#include <vector>
-
-#include "base/scoped_flock.h"
-#include "jit/offline_profiling_info.cc"
-
-namespace art {
-
-class ProfileAssistant {
- public:
-  // Process the profile information present in the given files. Returns true
-  // if the analysis ended up successfully (i.e. no errors during reading,
-  // merging or writing of profile files).
-  //
-  // If the returned value is true and there is a significant difference between
-  // profile_files and reference_profile_files:
-  //   - profile_compilation_info is set to a not null object that
-  //     can be used to drive compilation. It will be the merge of all the data
-  //     found in profile_files and reference_profile_files.
-  //   - the data from profile_files[i] is merged into
-  //     reference_profile_files[i] and the corresponding backing file is
-  //     updated.
-  //
-  // If the returned value is false or the difference is insignificant,
-  // profile_compilation_info will be set to null.
-  //
-  // Additional notes:
-  //   - as mentioned above, this function may update the content of the files
-  //     passed with the reference_profile_files.
-  //   - if reference_profile_files is not empty it must be the same size as
-  //     profile_files.
-  static bool ProcessProfiles(
-      const std::vector<std::string>& profile_files,
-      const std::vector<std::string>& reference_profile_files,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info);
-
-  static bool ProcessProfiles(
-      const std::vector<uint32_t>& profile_files_fd_,
-      const std::vector<uint32_t>& reference_profile_files_fd_,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info);
-
- private:
-  static bool ProcessProfilesInternal(
-      const std::vector<ScopedFlock>& profile_files,
-      const std::vector<ScopedFlock>& reference_profile_files,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info);
-
-  DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_PROFILE_ASSISTANT_H_
diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc
deleted file mode 100644
index 58b7513..0000000
--- a/compiler/profile_assistant_test.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "base/unix_file/fd_file.h"
-#include "common_runtime_test.h"
-#include "compiler/profile_assistant.h"
-#include "jit/offline_profiling_info.h"
-
-namespace art {
-
-class ProfileAssistantTest : public CommonRuntimeTest {
- protected:
-  void SetupProfile(const std::string& id,
-                    uint32_t checksum,
-                    uint16_t number_of_methods,
-                    const ScratchFile& profile,
-                    ProfileCompilationInfo* info,
-                    uint16_t start_method_index = 0) {
-    std::string dex_location1 = "location1" + id;
-    uint32_t dex_location_checksum1 = checksum;
-    std::string dex_location2 = "location2" + id;
-    uint32_t dex_location_checksum2 = 10 * checksum;
-    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
-      ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
-      ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
-    }
-    ASSERT_TRUE(info->Save(GetFd(profile)));
-    ASSERT_EQ(0, profile.GetFile()->Flush());
-    ASSERT_TRUE(profile.GetFile()->ResetOffset());
-  }
-
-  uint32_t GetFd(const ScratchFile& file) const {
-    return static_cast<uint32_t>(file.GetFd());
-  }
-};
-
-TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
-  ScratchFile profile1;
-  ScratchFile profile2;
-  ScratchFile reference_profile1;
-  ScratchFile reference_profile2;
-
-  std::vector<uint32_t> profile_fds({
-      GetFd(profile1),
-      GetFd(profile2)});
-  std::vector<uint32_t> reference_profile_fds({
-      GetFd(reference_profile1),
-      GetFd(reference_profile2)});
-
-  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
-  ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
-  ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
-
-  // We should advise compilation.
-  ProfileCompilationInfo* result;
-  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
-  ASSERT_TRUE(result != nullptr);
-
-  // The resulting compilation info must be equal to the merge of the inputs.
-  ProfileCompilationInfo expected;
-  ASSERT_TRUE(expected.Load(info1));
-  ASSERT_TRUE(expected.Load(info2));
-  ASSERT_TRUE(expected.Equals(*result));
-
-  // The information from profiles must be transfered to the reference profiles.
-  ProfileCompilationInfo file_info1;
-  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
-  ASSERT_TRUE(file_info1.Equals(info1));
-
-  ProfileCompilationInfo file_info2;
-  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
-  ASSERT_TRUE(file_info2.Equals(info2));
-
-  // Initial profiles must be cleared.
-  ASSERT_EQ(0, profile1.GetFile()->GetLength());
-  ASSERT_EQ(0, profile2.GetFile()->GetLength());
-}
-
-TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
-  ScratchFile profile1;
-  ScratchFile profile2;
-  ScratchFile reference_profile1;
-  ScratchFile reference_profile2;
-
-  std::vector<uint32_t> profile_fds({
-      GetFd(profile1),
-      GetFd(profile2)});
-  std::vector<uint32_t> reference_profile_fds({
-      GetFd(reference_profile1),
-      GetFd(reference_profile2)});
-
-  // The new profile info will contain the methods with indices 0-100.
-  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
-  ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
-  ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
-
-
-  // The reference profile info will contain the methods with indices 50-150.
-  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
-  ProfileCompilationInfo reference_info1;
-  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1,
-      &reference_info1, kNumberOfMethodsToEnableCompilation / 2);
-  ProfileCompilationInfo reference_info2;
-  SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2,
-      &reference_info2, kNumberOfMethodsToEnableCompilation / 2);
-
-  // We should advise compilation.
-  ProfileCompilationInfo* result;
-  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
-  ASSERT_TRUE(result != nullptr);
-
-  // The resulting compilation info must be equal to the merge of the inputs
-  ProfileCompilationInfo expected;
-  ASSERT_TRUE(expected.Load(info1));
-  ASSERT_TRUE(expected.Load(info2));
-  ASSERT_TRUE(expected.Load(reference_info1));
-  ASSERT_TRUE(expected.Load(reference_info2));
-  ASSERT_TRUE(expected.Equals(*result));
-
-  // The information from profiles must be transfered to the reference profiles.
-  ProfileCompilationInfo file_info1;
-  ProfileCompilationInfo merge1;
-  ASSERT_TRUE(merge1.Load(info1));
-  ASSERT_TRUE(merge1.Load(reference_info1));
-  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
-  ASSERT_TRUE(file_info1.Equals(merge1));
-
-  ProfileCompilationInfo file_info2;
-  ProfileCompilationInfo merge2;
-  ASSERT_TRUE(merge2.Load(info2));
-  ASSERT_TRUE(merge2.Load(reference_info2));
-  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
-  ASSERT_TRUE(file_info2.Equals(merge2));
-
-  // Initial profiles must be cleared.
-  ASSERT_EQ(0, profile1.GetFile()->GetLength());
-  ASSERT_EQ(0, profile2.GetFile()->GetLength());
-}
-
-TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
-  ScratchFile profile1;
-  ScratchFile profile2;
-  ScratchFile reference_profile1;
-  ScratchFile reference_profile2;
-
-  std::vector<uint32_t> profile_fds({
-      GetFd(profile1),
-      GetFd(profile2)});
-  std::vector<uint32_t> reference_profile_fds({
-      GetFd(reference_profile1),
-      GetFd(reference_profile2)});
-
-  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
-  ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
-  ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
-
-  // We should not advise compilation.
-  ProfileCompilationInfo* result = nullptr;
-  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
-  ASSERT_TRUE(result == nullptr);
-
-  // The information from profiles must remain the same.
-  ProfileCompilationInfo file_info1;
-  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
-  ASSERT_TRUE(file_info1.Equals(info1));
-
-  ProfileCompilationInfo file_info2;
-  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
-  ASSERT_TRUE(file_info2.Equals(info2));
-
-  // Reference profile files must remain empty.
-  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
-  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
-}
-
-TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
-  ScratchFile profile1;
-  ScratchFile profile2;
-  ScratchFile reference_profile1;
-  ScratchFile reference_profile2;
-
-  std::vector<uint32_t> profile_fds({
-      GetFd(profile1),
-      GetFd(profile2)});
-  std::vector<uint32_t> reference_profile_fds({
-      GetFd(reference_profile1),
-      GetFd(reference_profile2)});
-
-  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
-  // Assign different hashes for the same dex file. This will make merging of information to fail.
-  ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
-  ProfileCompilationInfo info2;
-  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
-
-  // We should fail processing.
-  ProfileCompilationInfo* result = nullptr;
-  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
-  ASSERT_TRUE(result == nullptr);
-
-  // The information from profiles must still remain the same.
-  ProfileCompilationInfo file_info1;
-  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
-  ASSERT_TRUE(file_info1.Equals(info1));
-
-  ProfileCompilationInfo file_info2;
-  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
-  ASSERT_TRUE(file_info2.Equals(info2));
-
-  // Reference profile files must still remain empty.
-  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
-  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
-}
-
-TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
-  ScratchFile profile1;
-  ScratchFile reference_profile;
-
-  std::vector<uint32_t> profile_fds({
-      GetFd(profile1)});
-  std::vector<uint32_t> reference_profile_fds({
-      GetFd(reference_profile)});
-
-  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
-  // Assign different hashes for the same dex file. This will make merging of information to fail.
-  ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
-  ProfileCompilationInfo reference_info;
-  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
-
-  // We should not advise compilation.
-  ProfileCompilationInfo* result = nullptr;
-  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
-  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
-  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
-  ASSERT_TRUE(result == nullptr);
-
-  // The information from profiles must still remain the same.
-  ProfileCompilationInfo file_info1;
-  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
-  ASSERT_TRUE(file_info1.Equals(info1));
-
-  ProfileCompilationInfo file_info2;
-  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
-  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile)));
-  ASSERT_TRUE(file_info2.Equals(reference_info));
-}
-
-}  // namespace art
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ac9c097..6fd65ee 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -426,6 +426,16 @@
   EmitI(0x23, rs, rt, imm16);
 }
 
+void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x22, rs, rt, imm16);
+}
+
+void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x26, rs, rt, imm16);
+}
+
 void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x24, rs, rt, imm16);
 }
@@ -465,6 +475,16 @@
   EmitI(0x2b, rs, rt, imm16);
 }
 
+void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x2a, rs, rt, imm16);
+}
+
+void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) {
+  CHECK(!IsR6());
+  EmitI(0x2e, rs, rt, imm16);
+}
+
 void MipsAssembler::Slt(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x2a);
 }
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 01c6490..2262af4 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -162,6 +162,8 @@
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
   void Lw(Register rt, Register rs, uint16_t imm16);
+  void Lwl(Register rt, Register rs, uint16_t imm16);
+  void Lwr(Register rt, Register rs, uint16_t imm16);
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
   void Lui(Register rt, uint16_t imm16);
@@ -172,6 +174,8 @@
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
   void Sw(Register rt, Register rs, uint16_t imm16);
+  void Swl(Register rt, Register rs, uint16_t imm16);
+  void Swr(Register rt, Register rs, uint16_t imm16);
 
   void Slt(Register rd, Register rs, Register rt);
   void Sltu(Register rd, Register rs, Register rt);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 5fc3dee..9e27f07 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -335,6 +335,18 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "Nor");
 }
 
+//////////
+// MISC //
+//////////
+
+TEST_F(AssemblerMIPSTest, Movz) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Movz, "movz ${reg1}, ${reg2}, ${reg3}"), "Movz");
+}
+
+TEST_F(AssemblerMIPSTest, Movn) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Movn, "movn ${reg1}, ${reg2}, ${reg3}"), "Movn");
+}
+
 TEST_F(AssemblerMIPSTest, Seb) {
   DriverStr(RepeatRR(&mips::MipsAssembler::Seb, "seb ${reg1}, ${reg2}"), "Seb");
 }
@@ -363,6 +375,10 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srlv, "srlv ${reg1}, ${reg2}, ${reg3}"), "Srlv");
 }
 
+TEST_F(AssemblerMIPSTest, Rotrv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Rotrv, "rotrv ${reg1}, ${reg2}, ${reg3}"), "rotrv");
+}
+
 TEST_F(AssemblerMIPSTest, Srav) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
 }
@@ -405,6 +421,14 @@
   DriverStr(expected, "Ext");
 }
 
+TEST_F(AssemblerMIPSTest, ClzR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::ClzR2, "clz ${reg1}, ${reg2}"), "clzR2");
+}
+
+TEST_F(AssemblerMIPSTest, CloR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::CloR2, "clo ${reg1}, ${reg2}"), "cloR2");
+}
+
 TEST_F(AssemblerMIPSTest, Lb) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
 }
@@ -413,10 +437,18 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "Lh");
 }
 
+TEST_F(AssemblerMIPSTest, Lwl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lwl, -16, "lwl ${reg1}, {imm}(${reg2})"), "Lwl");
+}
+
 TEST_F(AssemblerMIPSTest, Lw) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "Lw");
 }
 
+TEST_F(AssemblerMIPSTest, Lwr) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lwr, -16, "lwr ${reg1}, {imm}(${reg2})"), "Lwr");
+}
+
 TEST_F(AssemblerMIPSTest, Lbu) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "Lbu");
 }
@@ -445,10 +477,18 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "Sh");
 }
 
+TEST_F(AssemblerMIPSTest, Swl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Swl, -16, "swl ${reg1}, {imm}(${reg2})"), "Swl");
+}
+
 TEST_F(AssemblerMIPSTest, Sw) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "Sw");
 }
 
+TEST_F(AssemblerMIPSTest, Swr) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Swr, -16, "swr ${reg1}, {imm}(${reg2})"), "Swr");
+}
+
 TEST_F(AssemblerMIPSTest, Slt) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "Slt");
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 541fb5a..f7efdc5 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -40,6 +40,7 @@
 #include "art_method-inl.h"
 #include "base/dumpable.h"
 #include "base/macros.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
@@ -71,7 +72,6 @@
 #include "mirror/object_array-inl.h"
 #include "oat_writer.h"
 #include "os.h"
-#include "profile_assistant.h"
 #include "runtime.h"
 #include "runtime_options.h"
 #include "ScopedLocalRef.h"
@@ -339,23 +339,10 @@
   UsageError("      Example: --runtime-arg -Xms256m");
   UsageError("");
   UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
-  UsageError("      Can be specified multiple time, in which case the data from the different");
-  UsageError("      profiles will be aggregated.");
-  UsageError("");
-  UsageError("  --reference-profile-file=<filename>: specify a reference profile file to use when");
-  UsageError("      compiling. The data in this file will be compared with the data in the");
-  UsageError("      associated --profile-file and the compilation will proceed only if there is");
-  UsageError("      a significant difference (--reference-profile-file is paired with");
-  UsageError("      --profile-file in the natural order). If the compilation was attempted then");
-  UsageError("      --profile-file will be merged into --reference-profile-file. Valid only when");
-  UsageError("      specified together with --profile-file.");
   UsageError("");
   UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
   UsageError("      Cannot be used together with --profile-file.");
   UsageError("");
-  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
-  UsageError("      accepts a file descriptor. Cannot be used together with");
-  UsageError("       --reference-profile-file.");
   UsageError("  --print-pass-names: print a list of pass names");
   UsageError("");
   UsageError("  --disable-passes=<pass-names>:  disable one or more passes separated by comma.");
@@ -517,14 +504,6 @@
   return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
 }
 
-static void CloseAllFds(const std::vector<uint32_t>& fds, const char* descriptor) {
-  for (size_t i = 0; i < fds.size(); i++) {
-    if (close(fds[i]) < 0) {
-      PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
-    }
-  }
-}
-
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
@@ -567,10 +546,12 @@
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
-      swap_fd_(-1),
+      swap_fd_(kInvalidFd),
       app_image_fd_(kInvalidFd),
+      profile_file_fd_(kInvalidFd),
       timings_(timings),
-      force_determinism_(false) {}
+      force_determinism_(false)
+      {}
 
   ~Dex2Oat() {
     // Log completion time before deleting the runtime_, because this accesses
@@ -824,25 +805,8 @@
       }
     }
 
-    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
-      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
-    }
-    if (!profile_files_.empty()) {
-      if (!reference_profile_files_.empty() &&
-          (reference_profile_files_.size() != profile_files_.size())) {
-        Usage("If specified, --reference-profile-file should match the number of --profile-file.");
-      }
-    } else if (!reference_profile_files_.empty()) {
-      Usage("--reference-profile-file should only be supplied with --profile-file");
-    }
-    if (!profile_files_fd_.empty()) {
-      if (!reference_profile_files_fd_.empty() &&
-          (reference_profile_files_fd_.size() != profile_files_fd_.size())) {
-        Usage("If specified, --reference-profile-file-fd should match the number",
-              " of --profile-file-fd.");
-      }
-    } else if (!reference_profile_files_fd_.empty()) {
-      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
+    if (!profile_file_.empty() && (profile_file_fd_ != kInvalidFd)) {
+      Usage("Profile file should not be specified with both --profile-file-fd and --profile-file");
     }
 
     if (!parser_options->oat_symbols.empty()) {
@@ -1153,16 +1117,9 @@
       } else if (option.starts_with("--compiler-backend=")) {
         ParseCompilerBackend(option, parser_options.get());
       } else if (option.starts_with("--profile-file=")) {
-        profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
-      } else if (option.starts_with("--reference-profile-file=")) {
-        reference_profile_files_.push_back(
-            option.substr(strlen("--reference-profile-file=")).ToString());
+        profile_file_ = option.substr(strlen("--profile-file=")).ToString();
       } else if (option.starts_with("--profile-file-fd=")) {
-        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
-      } else if (option.starts_with("--reference-profile-file-fd=")) {
-        ParseFdForCollection(option, "--reference_profile-file-fd", &reference_profile_files_fd_);
-      } else if (option == "--no-profile-file") {
-        // No profile
+        ParseUintOption(option, "--profile-file-fd", &profile_file_fd_, Usage);
       } else if (option == "--host") {
         is_host_ = true;
       } else if (option == "--runtime-arg") {
@@ -1865,33 +1822,44 @@
   }
 
   bool UseProfileGuidedCompilation() const {
-    return !profile_files_.empty() || !profile_files_fd_.empty();
+    return !profile_file_.empty() || (profile_file_fd_ != kInvalidFd);
   }
 
-  bool ProcessProfiles() {
+  bool LoadProfile() {
     DCHECK(UseProfileGuidedCompilation());
-    ProfileCompilationInfo* info = nullptr;
-    bool result = false;
-    if (profile_files_.empty()) {
-      DCHECK(!profile_files_fd_.empty());
-      result = ProfileAssistant::ProcessProfiles(
-          profile_files_fd_, reference_profile_files_fd_, &info);
-      CloseAllFds(profile_files_fd_, "profile_files_fd_");
-      CloseAllFds(reference_profile_files_fd_, "reference_profile_files_fd_");
+
+    profile_compilation_info_.reset(new ProfileCompilationInfo());
+    ScopedFlock flock;
+    bool success = false;
+    std::string error;
+    if (profile_file_fd_ != -1) {
+      // The file doesn't need to be flushed so don't check the usage.
+      // Pass a bogus path so that we can easily attribute any reported error.
+      File file(profile_file_fd_, "profile", /*check_usage*/ false, /*read_only_mode*/ true);
+      if (flock.Init(&file, &error)) {
+        success = profile_compilation_info_->Load(profile_file_fd_);
+      }
     } else {
-      result = ProfileAssistant::ProcessProfiles(
-          profile_files_, reference_profile_files_, &info);
+      if (flock.Init(profile_file_.c_str(), O_RDONLY, /* block */ true, &error)) {
+        success = profile_compilation_info_->Load(flock.GetFile()->Fd());
+      }
+    }
+    if (!error.empty()) {
+      LOG(WARNING) << "Cannot lock profiles: " << error;
     }
 
-    profile_compilation_info_.reset(info);
+    if (!success) {
+      profile_compilation_info_.reset(nullptr);
+    }
 
-    return result;
+    return success;
   }
 
   bool ShouldCompileBasedOnProfiles() const {
     DCHECK(UseProfileGuidedCompilation());
-    // If we are given profiles, compile only if we have new information.
-    return profile_compilation_info_ != nullptr;
+    // If we are given a profile, compile only if we have some data in it.
+    return (profile_compilation_info_ != nullptr) &&
+        (profile_compilation_info_->GetNumberOfMethods() != 0);
   }
 
  private:
@@ -2460,10 +2428,8 @@
   int swap_fd_;
   std::string app_image_file_name_;
   int app_image_fd_;
-  std::vector<std::string> profile_files_;
-  std::vector<std::string> reference_profile_files_;
-  std::vector<uint32_t> profile_files_fd_;
-  std::vector<uint32_t> reference_profile_files_fd_;
+  std::string profile_file_;
+  int profile_file_fd_;
   std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
@@ -2592,7 +2558,7 @@
   // Process profile information and assess if we need to do a profile guided compilation.
   // This operation involves I/O.
   if (dex2oat.UseProfileGuidedCompilation()) {
-    if (dex2oat.ProcessProfiles()) {
+    if (dex2oat.LoadProfile()) {
       if (!dex2oat.ShouldCompileBasedOnProfiles()) {
         LOG(INFO) << "Skipped compilation because of insignificant profile delta";
         return EXIT_SUCCESS;
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index f922687..428266f 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -306,13 +306,17 @@
 
   { kITypeMask, 32u << kOpcodeShift, "lb", "TO", },
   { kITypeMask, 33u << kOpcodeShift, "lh", "TO", },
+  { kITypeMask, 34u << kOpcodeShift, "lwl", "TO", },
   { kITypeMask, 35u << kOpcodeShift, "lw", "TO", },
   { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", },
   { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", },
+  { kITypeMask, 38u << kOpcodeShift, "lwr", "TO", },
   { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", },
   { kITypeMask, 40u << kOpcodeShift, "sb", "TO", },
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
+  { kITypeMask, 42u << kOpcodeShift, "swl", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
+  { kITypeMask, 46u << kOpcodeShift, "swr", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
   { kJTypeMask, 50u << kOpcodeShift, "bc", "P" },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
diff --git a/profman/Android.mk b/profman/Android.mk
new file mode 100644
index 0000000..d38d107
--- /dev/null
+++ b/profman/Android.mk
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.executable.mk
+
+PROFMAN_SRC_FILES := \
+	profman.cc \
+	profile_assistant.cc
+
+# TODO: Remove this when the framework (installd) supports pushing the
+# right instruction-set parameter for the primary architecture.
+ifneq ($(filter ro.zygote=zygote64,$(PRODUCT_DEFAULT_PROPERTY_OVERRIDES)),)
+  profman_arch := 64
+else
+  profman_arch := 32
+endif
+
+ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,target,ndebug,$(profman_arch)))
+endif
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,target,debug,$(profman_arch)))
+endif
+
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-art-executable,profman,$(PROFMAN_SRC_FILES),libcutils,art/profman,host,debug))
+endif
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
new file mode 100644
index 0000000..58e8a3a
--- /dev/null
+++ b/profman/profile_assistant.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_assistant.h"
+
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
+namespace art {
+
+// Minimum number of new methods that profiles must contain to enable recompilation.
+static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+
+ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
+        const std::vector<ScopedFlock>& profile_files,
+        const ScopedFlock& reference_profile_file) {
+  DCHECK(!profile_files.empty());
+
+  std::vector<ProfileCompilationInfo> new_info(profile_files.size());
+  bool should_compile = false;
+  // Read the main profile files.
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
+      return kErrorBadProfiles;
+    }
+    // Do we have enough new profiled methods that will make the compilation worthwhile?
+    should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
+  }
+
+  if (!should_compile) {
+    return kSkipCompilation;
+  }
+
+  // Merge information.
+  ProfileCompilationInfo info;
+  if (!info.Load(reference_profile_file.GetFile()->Fd())) {
+    LOG(WARNING) << "Could not load reference profile file";
+    return kErrorBadProfiles;
+  }
+
+  for (size_t i = 0; i < new_info.size(); i++) {
+    // Merge all data into a single object.
+    if (!info.Load(new_info[i])) {
+      LOG(WARNING) << "Could not merge profile data at index " << i;
+      return kErrorBadProfiles;
+    }
+  }
+  // We were successful in merging all profile information. Update the reference profile.
+  if (!reference_profile_file.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear reference profile file";
+    return kErrorIO;
+  }
+  if (!info.Save(reference_profile_file.GetFile()->Fd())) {
+    LOG(WARNING) << "Could not save reference profile file";
+    return kErrorIO;
+  }
+
+  return kCompile;
+}
+
+static bool InitFlock(const std::string& filename, ScopedFlock& flock, std::string* error) {
+  return flock.Init(filename.c_str(), O_RDWR, /* block */ true, error);
+}
+
+static bool InitFlock(int fd, ScopedFlock& flock, std::string* error) {
+  DCHECK_GE(fd, 0);
+  // We do not own the descriptor, so disable auto-close and don't check usage.
+  File file(fd, false);
+  file.DisableAutoClose();
+  return flock.Init(&file, error);
+}
+
+class ScopedCollectionFlock {
+ public:
+  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+    for (size_t i = 0; i < filenames.size(); i++) {
+      if (!InitFlock(filenames[i], flocks_[i], error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<int>& fds, /* out */ std::string* error) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      DCHECK_GE(fds[i], 0);
+      if (!InitFlock(fds[i], flocks_[i], error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+  std::vector<ScopedFlock> flocks_;
+};
+
+ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
+        const std::vector<int>& profile_files_fd,
+        int reference_profile_file_fd) {
+  DCHECK_GE(reference_profile_file_fd, 0);
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return kErrorCannotLock;
+  }
+  ScopedFlock reference_profile_file_flock;
+  if (!InitFlock(reference_profile_file_fd, reference_profile_file_flock, &error)) {
+    LOG(WARNING) << "Could not lock reference profiled files: " << error;
+    return kErrorCannotLock;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_file_flock);
+}
+
+ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
+        const std::vector<std::string>& profile_files,
+        const std::string& reference_profile_file) {
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files.size());
+  if (!profile_files_flocks.Init(profile_files, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return kErrorCannotLock;
+  }
+  ScopedFlock reference_profile_file_flock;
+  if (!InitFlock(reference_profile_file, reference_profile_file_flock, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return kErrorCannotLock;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_file_flock);
+}
+
+}  // namespace art
diff --git a/profman/profile_assistant.h b/profman/profile_assistant.h
new file mode 100644
index 0000000..d3c75b8
--- /dev/null
+++ b/profman/profile_assistant.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_PROFMAN_PROFILE_ASSISTANT_H_
+#define ART_PROFMAN_PROFILE_ASSISTANT_H_
+
+#include <string>
+#include <vector>
+
+#include "base/scoped_flock.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistant {
+ public:
+  // These also serve as return codes of profman and are processed by installd
+  // (frameworks/native/cmds/installd/commands.cpp)
+  enum ProcessingResult {
+    kCompile = 0,
+    kSkipCompilation = 1,
+    kErrorBadProfiles = 2,
+    kErrorIO = 3,
+    kErrorCannotLock = 4
+  };
+
+  // Process the profile information present in the given files. Returns one of
+  // ProcessingResult values depending on profile information and whether or not
+  // the analysis ended up successfully (i.e. no errors during reading,
+  // merging or writing of profile files).
+  //
+  // When the returned value is kCompile there is a significant difference
+  // between profile_files and reference_profile_files. In this case
+  // reference_profile will be updated with the profiling info obtain after
+  // merging all profiles.
+  //
+  // When the returned value is kSkipCompilation, the difference between the
+  // merge of the current profiles and the reference one is insignificant. In
+  // this case no file will be updated.
+  //
+  static ProcessingResult ProcessProfiles(
+      const std::vector<std::string>& profile_files,
+      const std::string& reference_profile_file);
+
+  static ProcessingResult ProcessProfiles(
+      const std::vector<int>& profile_files_fd_,
+      int reference_profile_file_fd);
+
+ private:
+  static ProcessingResult ProcessProfilesInternal(
+      const std::vector<ScopedFlock>& profile_files,
+      const ScopedFlock& reference_profile_file);
+
+  DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
+};
+
+}  // namespace art
+
+#endif  // ART_PROFMAN_PROFILE_ASSISTANT_H_
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
new file mode 100644
index 0000000..543be5d
--- /dev/null
+++ b/profman/profile_assistant_test.cc
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+#include "utils.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+  void SetupProfile(const std::string& id,
+                    uint32_t checksum,
+                    uint16_t number_of_methods,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0) {
+    std::string dex_location1 = "location1" + id;
+    uint32_t dex_location_checksum1 = checksum;
+    std::string dex_location2 = "location2" + id;
+    uint32_t dex_location_checksum2 = 10 * checksum;
+    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+      ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  int GetFd(const ScratchFile& file) const {
+    return static_cast<int>(file.GetFd());
+  }
+
+  void CheckProfileInfo(ScratchFile& file, const ProfileCompilationInfo& info) {
+    ProfileCompilationInfo file_info;
+    ASSERT_TRUE(file.GetFile()->ResetOffset());
+    ASSERT_TRUE(file_info.Load(GetFd(file)));
+    ASSERT_TRUE(file_info.Equals(info));
+  }
+
+    // Runs test with given arguments.
+  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+    std::string file_path = GetTestAndroidRoot();
+    if (IsHost()) {
+      file_path += "/bin/profman";
+    } else {
+      file_path += "/xbin/profman";
+    }
+    if (kIsDebugBuild) {
+      file_path += "d";
+    }
+
+    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
+    std::vector<std::string> argv_str;
+    argv_str.push_back(file_path);
+    for (size_t k = 0; k < profiles_fd.size(); k++) {
+      argv_str.push_back("--profile-file-fd=" + std::to_string(profiles_fd[k]));
+    }
+    argv_str.push_back("--reference-profile-file-fd=" + std::to_string(reference_profile_fd));
+
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile,
+      &reference_info, kNumberOfMethodsToEnableCompilation / 2);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The resulting compilation info must be equal to the merge of the inputs
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Load(reference_info));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+
+  // We should not advise compilation.
+  ASSERT_EQ(ProfileAssistant::kSkipCompilation,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The information from profiles must remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must remain empty.
+  ASSERT_EQ(0, reference_profile.GetFile()->GetLength());
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should fail processing.
+  ASSERT_EQ(ProfileAssistant::kErrorBadProfiles,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+  CheckProfileInfo(profile2, info2);
+
+  // Reference profile files must still remain empty.
+  ASSERT_EQ(0, reference_profile.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+
+  // We should not advise compilation.
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_EQ(ProfileAssistant::kErrorBadProfiles,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
+}  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
new file mode 100644
index 0000000..7c9e449
--- /dev/null
+++ b/profman/profman.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "base/dumpable.h"
+#include "base/scoped_flock.h"
+#include "base/stringpiece.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+#include "base/unix_file/fd_file.h"
+#include "jit/offline_profiling_info.h"
+#include "utils.h"
+#include "profile_assistant.h"
+
+namespace art {
+
+static int original_argc;
+static char** original_argv;
+
+static std::string CommandLine() {
+  std::vector<std::string> command;
+  for (int i = 0; i < original_argc; ++i) {
+    command.push_back(original_argv[i]);
+  }
+  return Join(command, ' ');
+}
+
+static void UsageErrorV(const char* fmt, va_list ap) {
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(ERROR) << error;
+}
+
+static void UsageError(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  UsageErrorV(fmt, ap);
+  va_end(ap);
+}
+
+NO_RETURN static void Usage(const char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  UsageErrorV(fmt, ap);
+  va_end(ap);
+
+  UsageError("Command: %s", CommandLine().c_str());
+  UsageError("Usage: profman [options]...");
+  UsageError("");
+  UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
+  UsageError("      Can be specified multiple time, in which case the data from the different");
+  UsageError("      profiles will be aggregated.");
+  UsageError("");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
+  UsageError("");
+  UsageError("  --reference-profile-file=<filename>: specify a reference profile.");
+  UsageError("      The data in this file will be compared with the data obtained by merging");
+  UsageError("      all the files specified with --profile-file or --profile-file-fd.");
+  UsageError("      If the exit code is EXIT_COMPILE then all --profile-file will be merged into");
+  UsageError("      --reference-profile-file. ");
+  UsageError("");
+  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
+  UsageError("      accepts a file descriptor. Cannot be used together with");
+  UsageError("      --reference-profile-file.");
+  UsageError("");
+
+  exit(EXIT_FAILURE);
+}
+
+class ProfMan FINAL {
+ public:
+  ProfMan() :
+      reference_profile_file_fd_(-1),
+      start_ns_(NanoTime()) {}
+
+  ~ProfMan() {
+    LogCompletionTime();
+  }
+
+  void ParseArgs(int argc, char **argv) {
+    original_argc = argc;
+    original_argv = argv;
+
+    InitLogging(argv);
+
+    // Skip over the command name.
+    argv++;
+    argc--;
+
+    if (argc == 0) {
+      Usage("No arguments specified");
+    }
+
+    for (int i = 0; i < argc; ++i) {
+      const StringPiece option(argv[i]);
+      const bool log_options = false;
+      if (log_options) {
+        LOG(INFO) << "patchoat: option[" << i << "]=" << argv[i];
+      }
+      if (option.starts_with("--profile-file=")) {
+        profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
+      } else if (option.starts_with("--reference-profile-file=")) {
+        reference_profile_file_ = option.substr(strlen("--reference-profile-file=")).ToString();
+      } else if (option.starts_with("--reference-profile-file-fd=")) {
+        ParseUintOption(option, "--reference-profile-file-fd", &reference_profile_file_fd_, Usage);
+      } else {
+        Usage("Unknown argument %s", option.data());
+      }
+    }
+
+    if (profile_files_.empty() && profile_files_fd_.empty()) {
+      Usage("No profile files specified.");
+    }
+    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
+      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
+    }
+    if (!reference_profile_file_.empty() && (reference_profile_file_fd_ != -1)) {
+      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
+    }
+    if (reference_profile_file_.empty() && (reference_profile_file_fd_ == -1)) {
+      Usage("Reference profile file not specified");
+    }
+  }
+
+  ProfileAssistant::ProcessingResult ProcessProfiles() {
+    ProfileAssistant::ProcessingResult result;
+    if (profile_files_.empty()) {
+      // The file doesn't need to be flushed here (ProcessProfiles will do it)
+      // so don't check the usage.
+      File file(reference_profile_file_fd_, false);
+      result = ProfileAssistant::ProcessProfiles(profile_files_fd_, reference_profile_file_fd_);
+      CloseAllFds(profile_files_fd_, "profile_files_fd_");
+    } else {
+      result = ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_file_);
+    }
+    return result;
+  }
+
+ private:
+  static void ParseFdForCollection(const StringPiece& option,
+                                   const char* arg_name,
+                                   std::vector<int>* fds) {
+    int fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
+  }
+
+  static void CloseAllFds(const std::vector<int>& fds, const char* descriptor) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      if (close(fds[i]) < 0) {
+        PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+      }
+    }
+  }
+
+  void LogCompletionTime() {
+    LOG(INFO) << "profman took " << PrettyDuration(NanoTime() - start_ns_);
+  }
+
+  std::vector<std::string> profile_files_;
+  std::vector<int> profile_files_fd_;
+  std::string reference_profile_file_;
+  int reference_profile_file_fd_;
+  uint64_t start_ns_;
+};
+
+// See ProfileAssistant::ProcessingResult for return codes.
+static int profman(int argc, char** argv) {
+  ProfMan profman;
+
+  // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
+  profman.ParseArgs(argc, argv);
+
+  // Process profile information and assess if we need to do a profile guided compilation.
+  // This operation involves I/O.
+  return profman.ProcessProfiles();
+}
+
+}  // namespace art
+
+int main(int argc, char **argv) {
+  return art::profman(argc, argv);
+}
+
diff --git a/runtime/Android.mk b/runtime/Android.mk
index e9f7add..947de8a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -238,6 +238,7 @@
 # (empty) body is called.
 JIT_DEBUG_REGISTER_CODE_LDFLAGS := -Wl,--keep-unique,__jit_debug_register_code
 LIBART_TARGET_LDFLAGS_arm    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_arm64  := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
 LIBART_TARGET_LDFLAGS_x86    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
 LIBART_TARGET_LDFLAGS_x86_64 := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
 JIT_DEBUG_REGISTER_CODE_LDFLAGS :=
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 078a978..f3e8d6b 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -132,6 +132,11 @@
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
+  // Returns true if this method might be copied from another class.
+  bool MightBeCopied() {
+    return IsMiranda() || IsDefault() || IsDefaultConflicting();
+  }
+
   bool IsMiranda() {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index de46b0c..8aaeaac 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -37,6 +37,7 @@
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
   bool class_linker;  // Enabled with "-verbose:class".
+  bool collector;
   bool compiler;
   bool deopt;
   bool gc;
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 82a5f96..6972b3e 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -1009,10 +1009,6 @@
     DCHECK(alloc_tracker_lock_ == nullptr);
     alloc_tracker_lock_ = new Mutex("AllocTracker lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kInterpreterStringInitMapLock);
-    DCHECK(interpreter_string_init_map_lock_ == nullptr);
-    interpreter_string_init_map_lock_ = new Mutex("Interpreter String initializer reference map lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock);
     DCHECK(thread_list_lock_ == nullptr);
     thread_list_lock_ = new Mutex("thread list lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index f674a6f..e72f2a2 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -102,7 +102,6 @@
   kMonitorListLock,
   kJniLoadLibraryLock,
   kThreadListLock,
-  kInterpreterStringInitMapLock,
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
diff --git a/runtime/base/scoped_arena_containers.h b/runtime/base/scoped_arena_containers.h
index 9b56856..bd19d00 100644
--- a/runtime/base/scoped_arena_containers.h
+++ b/runtime/base/scoped_arena_containers.h
@@ -201,11 +201,12 @@
 template <typename T>
 class ArenaDelete {
   static constexpr uint8_t kMagicFill = 0xCE;
+
  protected:
   // Used for variable sized objects such as RegisterLine.
   ALWAYS_INLINE void ProtectMemory(T* ptr, size_t size) const {
     if (RUNNING_ON_MEMORY_TOOL > 0) {
-      // Writing to the memory will fail if it we already destroyed the pointer with
+      // Writing to the memory will fail ift we already destroyed the pointer with
       // DestroyOnlyDelete since we make it no access.
       memset(ptr, kMagicFill, size);
       MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
@@ -220,8 +221,10 @@
 
  public:
   void operator()(T* ptr) const {
-    ptr->~T();
-    ProtectMemory(ptr, sizeof(T));
+    if (ptr != nullptr) {
+      ptr->~T();
+      ProtectMemory(ptr, sizeof(T));
+    }
   }
 };
 
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 814cbd0..0e8031f 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -83,7 +83,7 @@
 }
 
 bool ScopedFlock::Init(File* file, std::string* error_msg) {
-  file_.reset(new File(dup(file->Fd()), true));
+  file_.reset(new File(dup(file->Fd()), file->GetPath(), file->CheckUsage(), file->ReadOnlyMode()));
   if (file_->Fd() == -1) {
     file_.reset();
     *error_msg = StringPrintf("Failed to duplicate open file '%s': %s",
@@ -114,7 +114,13 @@
   if (file_.get() != nullptr) {
     int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
     CHECK_EQ(0, flock_result);
-    if (file_->FlushCloseOrErase() != 0) {
+    int close_result = -1;
+    if (file_->ReadOnlyMode()) {
+      close_result = file_->Close();
+    } else {
+      close_result = file_->FlushCloseOrErase();
+    }
+    if (close_result != 0) {
       PLOG(WARNING) << "Could not close scoped file lock file.";
     }
   }
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index e17bebb..4672948 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -35,18 +35,22 @@
 
 namespace unix_file {
 
-FdFile::FdFile() : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true) {
+FdFile::FdFile()
+    : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true), read_only_mode_(false) {
 }
 
 FdFile::FdFile(int fd, bool check_usage)
     : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
-      fd_(fd), auto_close_(true) {
+      fd_(fd), auto_close_(true), read_only_mode_(false) {
 }
 
 FdFile::FdFile(int fd, const std::string& path, bool check_usage)
+    : FdFile(fd, path, check_usage, false) {
+}
+
+FdFile::FdFile(int fd, const std::string& path, bool check_usage, bool read_only_mode)
     : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
-      fd_(fd), file_path_(path), auto_close_(true) {
-  CHECK_NE(0U, path.size());
+      fd_(fd), file_path_(path), auto_close_(true), read_only_mode_(read_only_mode) {
 }
 
 FdFile::~FdFile() {
@@ -99,6 +103,7 @@
 
 bool FdFile::Open(const std::string& path, int flags, mode_t mode) {
   CHECK_EQ(fd_, -1) << path;
+  read_only_mode_ = (flags & O_RDONLY) != 0;
   fd_ = TEMP_FAILURE_RETRY(open(path.c_str(), flags, mode));
   if (fd_ == -1) {
     return false;
@@ -136,6 +141,7 @@
 }
 
 int FdFile::Flush() {
+  DCHECK(!read_only_mode_);
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(fdatasync(fd_));
 #else
@@ -155,6 +161,7 @@
 }
 
 int FdFile::SetLength(int64_t new_length) {
+  DCHECK(!read_only_mode_);
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(ftruncate64(fd_, new_length));
 #else
@@ -171,6 +178,7 @@
 }
 
 int64_t FdFile::Write(const char* buf, int64_t byte_count, int64_t offset) {
+  DCHECK(!read_only_mode_);
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(pwrite64(fd_, buf, byte_count, offset));
 #else
@@ -184,6 +192,14 @@
   return fd_;
 }
 
+bool FdFile::ReadOnlyMode() const {
+  return read_only_mode_;
+}
+
+bool FdFile::CheckUsage() const {
+  return guard_state_ != GuardState::kNoCheck;
+}
+
 bool FdFile::IsOpened() const {
   return fd_ >= 0;
 }
@@ -219,6 +235,7 @@
 }
 
 bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
+  DCHECK(!read_only_mode_);
   const char* ptr = static_cast<const char*>(buffer);
   moveTo(GuardState::kBase, GuardState::kClosed, "Writing into closed file.");
   while (byte_count > 0) {
@@ -233,6 +250,7 @@
 }
 
 bool FdFile::Copy(FdFile* input_file, int64_t offset, int64_t size) {
+  DCHECK(!read_only_mode_);
   off_t off = static_cast<off_t>(offset);
   off_t sz = static_cast<off_t>(size);
   if (offset < 0 || static_cast<int64_t>(off) != offset ||
@@ -279,12 +297,14 @@
 }
 
 void FdFile::Erase() {
+  DCHECK(!read_only_mode_);
   TEMP_FAILURE_RETRY(SetLength(0));
   TEMP_FAILURE_RETRY(Flush());
   TEMP_FAILURE_RETRY(Close());
 }
 
 int FdFile::FlushCloseOrErase() {
+  DCHECK(!read_only_mode_);
   int flush_result = TEMP_FAILURE_RETRY(Flush());
   if (flush_result != 0) {
     LOG(::art::ERROR) << "CloseOrErase failed while flushing a file.";
@@ -301,6 +321,7 @@
 }
 
 int FdFile::FlushClose() {
+  DCHECK(!read_only_mode_);
   int flush_result = TEMP_FAILURE_RETRY(Flush());
   if (flush_result != 0) {
     LOG(::art::ERROR) << "FlushClose failed while flushing a file.";
@@ -317,6 +338,7 @@
 }
 
 bool FdFile::ClearContent() {
+  DCHECK(!read_only_mode_);
   if (SetLength(0) < 0) {
     PLOG(art::ERROR) << "Failed to reset the length";
     return false;
@@ -325,6 +347,7 @@
 }
 
 bool FdFile::ResetOffset() {
+  DCHECK(!read_only_mode_);
   off_t rc =  TEMP_FAILURE_RETRY(lseek(fd_, 0, SEEK_SET));
   if (rc == static_cast<off_t>(-1)) {
     PLOG(art::ERROR) << "Failed to reset the offset";
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 1e2d8af..8040afe 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -37,6 +37,7 @@
   // file descriptor. (Use DisableAutoClose to retain ownership.)
   FdFile(int fd, bool checkUsage);
   FdFile(int fd, const std::string& path, bool checkUsage);
+  FdFile(int fd, const std::string& path, bool checkUsage, bool read_only_mode);
 
   // Destroys an FdFile, closing the file descriptor if Close hasn't already
   // been called. (If you care about the return value of Close, call it
@@ -68,6 +69,8 @@
 
   // Bonus API.
   int Fd() const;
+  bool ReadOnlyMode() const;
+  bool CheckUsage() const;
   bool IsOpened() const;
   const std::string& GetPath() const {
     return file_path_;
@@ -119,6 +122,7 @@
   int fd_;
   std::string file_path_;
   bool auto_close_;
+  bool read_only_mode_;
 
   DISALLOW_COPY_AND_ASSIGN(FdFile);
 };
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5278d1b..936c988 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -759,7 +759,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
     CHECK(m->GetDeclaringClass() == nullptr) << PrettyMethod(m);
-  } else if (m->IsMiranda()) {
+  } else if (m->MightBeCopied()) {
     CHECK(m->GetDeclaringClass() != nullptr) << PrettyMethod(m);
   } else if (expected_class != nullptr) {
     CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << PrettyMethod(m);
@@ -1137,18 +1137,18 @@
 
   virtual void Visit(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
     GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(sizeof(void*));
-    const bool is_miranda = method->IsMiranda();
+    const bool maybe_copied = method->MightBeCopied();
     if (resolved_types != nullptr) {
       bool in_image_space = false;
-      if (kIsDebugBuild || is_miranda) {
+      if (kIsDebugBuild || maybe_copied) {
         in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
             reinterpret_cast<const uint8_t*>(resolved_types) - header_.GetImageBegin());
       }
       // Must be in image space for non-miranda method.
-      DCHECK(is_miranda || in_image_space)
+      DCHECK(maybe_copied || in_image_space)
           << resolved_types << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
-      if (!is_miranda || in_image_space) {
+      if (!maybe_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
                                          sizeof(void*));
@@ -1157,15 +1157,15 @@
     ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(sizeof(void*));
     if (resolved_methods != nullptr) {
       bool in_image_space = false;
-      if (kIsDebugBuild || is_miranda) {
+      if (kIsDebugBuild || maybe_copied) {
         in_image_space = header_.GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
               reinterpret_cast<const uint8_t*>(resolved_methods) - header_.GetImageBegin());
       }
       // Must be in image space for non-miranda method.
-      DCHECK(is_miranda || in_image_space)
+      DCHECK(maybe_copied || in_image_space)
           << resolved_methods << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
-      if (!is_miranda || in_image_space) {
+      if (!maybe_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
                                            sizeof(void*));
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 3a0f3e5..4d528ed 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -263,7 +263,7 @@
     for (ArtMethod& method : klass->GetCopiedMethods(sizeof(void*))) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
-      EXPECT_TRUE(method.IsMiranda() || method.IsDefault() || method.IsDefaultConflicting());
+      EXPECT_TRUE(method.MightBeCopied());
       EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
           << "declaring class: " << PrettyClass(method.GetDeclaringClass());
       EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 8b125dd..2c487fe 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -58,10 +58,16 @@
       page_release_mode_(page_release_mode),
       page_release_size_threshold_(page_release_size_threshold),
       is_running_on_memory_tool_(running_on_memory_tool) {
+  DCHECK_ALIGNED(base, kPageSize);
   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
   CHECK_LE(capacity, max_capacity);
   CHECK_ALIGNED(page_release_size_threshold_, kPageSize);
+  // Zero the memory explicitly (don't rely on that the mem map is zero-initialized).
+  if (!kMadviseZeroes) {
+    memset(base_, 0, max_capacity);
+  }
+  CHECK_EQ(madvise(base_, max_capacity, MADV_DONTNEED), 0);
   if (!initialized_) {
     Initialize();
   }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index a472a8b..b12cb5b 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -192,6 +192,7 @@
         Verify();
       }
       DCHECK(slot != nullptr);
+      DCHECK(slot->Next() == nullptr);
       Slot** headp = reinterpret_cast<Slot**>(&head_);
       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
       Slot* old_head = *headp;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 8e1b7f4..d393f0b 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1622,7 +1622,9 @@
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   ConcurrentCopyingRefFieldsVisitor visitor(this);
-  to_ref->VisitReferences(visitor, visitor);
+  // Disable the read barrier for a performance reason.
+  to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+      visitor, visitor);
 }
 
 // Process a field.
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index b60426d..c9ac435 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -66,6 +66,10 @@
     return end_;
   }
 
+  size_t Size() const {
+    return size_;
+  }
+
  private:
   void UpdateSize() {
     size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
index 8f9a9e2..26da4ca 100644
--- a/runtime/gc/collector/immune_spaces.cc
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -18,6 +18,7 @@
 
 #include "gc/space/space-inl.h"
 #include "mirror/object.h"
+#include "oat_file.h"
 
 namespace art {
 namespace gc {
@@ -45,11 +46,16 @@
       space::ImageSpace* image_space = space->AsImageSpace();
       // Update the end to include the other non-heap sections.
       space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize);
-      uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin());
-      uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd());
-      if (space_end == oat_begin) {
-        DCHECK_GE(oat_end, oat_begin);
-        space_end = oat_end;
+      // For the app image case, GetOatFileBegin is where the oat file was mapped during image
+      // creation, the actual oat file could be somewhere else.
+      const OatFile* const image_oat_file = image_space->GetOatFile();
+      if (image_oat_file != nullptr) {
+        uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_oat_file->Begin());
+        uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_oat_file->End());
+        if (space_end == oat_begin) {
+          DCHECK_GE(oat_end, oat_begin);
+          space_end = oat_end;
+        }
       }
     }
     if (cur_begin == 0u) {
@@ -71,6 +77,8 @@
   }
   largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin));
   largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end));
+  VLOG(collector) << "Immune region " << largest_immune_region_.Begin() << "-"
+                  << largest_immune_region_.End();
 }
 
 void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) {
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index ea290dd..56838f5 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -72,17 +72,31 @@
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit());
 }
 
+class DummyOatFile : public OatFile {
+ public:
+  DummyOatFile(uint8_t* begin, uint8_t* end) : OatFile("Location", /*is_executable*/ false) {
+    begin_ = begin;
+    end_ = end;
+  }
+};
+
 class DummyImageSpace : public space::ImageSpace {
  public:
-  DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap)
+  DummyImageSpace(MemMap* map,
+                  accounting::ContinuousSpaceBitmap* live_bitmap,
+                  std::unique_ptr<DummyOatFile>&& oat_file)
       : ImageSpace("DummyImageSpace",
                    /*image_location*/"",
                    map,
                    live_bitmap,
-                   map->End()) {}
+                   map->End()) {
+    oat_file_ = std::move(oat_file);
+    oat_file_non_owned_ = oat_file_.get();
+  }
 
-  // OatSize is how large the oat file is after the image.
-  static DummyImageSpace* Create(size_t size, size_t oat_size) {
+  // Size is the size of the image space, oat offset is where the oat file is located
+  // after the end of image space. oat_size is the size of the oat file.
+  static DummyImageSpace* Create(size_t size, size_t oat_offset, size_t oat_size) {
     std::string error_str;
     std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace",
                                                      nullptr,
@@ -100,6 +114,9 @@
     if (live_bitmap == nullptr) {
       return nullptr;
     }
+    // The actual mapped oat file may not be directly after the image for the app image case.
+    std::unique_ptr<DummyOatFile> oat_file(new DummyOatFile(map->End() + oat_offset,
+                                                            map->End() + oat_offset + oat_size));
     // Create image header.
     ImageSection sections[ImageHeader::kSectionCount];
     new (map->Begin()) ImageHeader(
@@ -108,6 +125,7 @@
         sections,
         /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1,
         /*oat_checksum*/0u,
+        // The oat file data in the header is always right after the image space.
         /*oat_file_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_begin*/PointerToLowMemUInt32(map->End()),
         /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
@@ -121,7 +139,7 @@
         /*is_pic*/false,
         ImageHeader::kStorageModeUncompressed,
         /*storage_size*/0u);
-    return new DummyImageSpace(map.release(), live_bitmap.release());
+    return new DummyImageSpace(map.release(), live_bitmap.release(), std::move(oat_file));
   }
 };
 
@@ -129,7 +147,9 @@
   ImmuneSpaces spaces;
   constexpr size_t image_size = 123 * kPageSize;
   constexpr size_t image_oat_size = 321 * kPageSize;
-  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size));
+  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size,
+                                                                       0,
+                                                                       image_oat_size));
   ASSERT_TRUE(image_space != nullptr);
   const ImageHeader& image_header = image_space->GetImageHeader();
   EXPECT_EQ(image_header.GetImageSize(), image_size);
@@ -150,6 +170,18 @@
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
             image_space->Begin());
   EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit());
+  // Check that appending with a gap between the map does not include the oat file.
+  image_space.reset(DummyImageSpace::Create(image_size, kPageSize, image_oat_size));
+  spaces.Reset();
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(image_space.get());
+  }
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  // Size should be equal, we should not add the oat file since it is not adjacent to the image
+  // space.
+  EXPECT_EQ(spaces.GetLargestImmuneRegion().Size(), image_size);
 }
 
 }  // namespace collector
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 0c06c38..894ce9a 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -676,13 +676,17 @@
         dest_(dest),
         length_(length) {}
 
-  bool ContainsSource(uintptr_t address) const {
+  bool InSource(uintptr_t address) const {
     return address - source_ < length_;
   }
 
+  bool InDest(uintptr_t address) const {
+    return address - dest_ < length_;
+  }
+
   // Translate a source address to the destination space.
   uintptr_t ToDest(uintptr_t address) const {
-    DCHECK(ContainsSource(address));
+    DCHECK(InSource(address));
     return address + Delta();
   }
 
@@ -724,24 +728,28 @@
   template <typename T>
   ALWAYS_INLINE T* ForwardObject(T* src) const {
     const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
-    if (boot_image_.ContainsSource(uint_src)) {
+    if (boot_image_.InSource(uint_src)) {
       return reinterpret_cast<T*>(boot_image_.ToDest(uint_src));
     }
-    if (app_image_.ContainsSource(uint_src)) {
+    if (app_image_.InSource(uint_src)) {
       return reinterpret_cast<T*>(app_image_.ToDest(uint_src));
     }
+    // Since we are fixing up the app image, there should only be pointers to the app image and
+    // boot image.
+    DCHECK(src == nullptr) << reinterpret_cast<const void*>(src);
     return src;
   }
 
   // Return the relocated address of a code pointer (contained by an oat file).
   ALWAYS_INLINE const void* ForwardCode(const void* src) const {
     const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
-    if (boot_oat_.ContainsSource(uint_src)) {
+    if (boot_oat_.InSource(uint_src)) {
      return reinterpret_cast<const void*>(boot_oat_.ToDest(uint_src));
     }
-    if (app_oat_.ContainsSource(uint_src)) {
+    if (app_oat_.InSource(uint_src)) {
       return reinterpret_cast<const void*>(app_oat_.ToDest(uint_src));
     }
+    DCHECK(src == nullptr) << src;
     return src;
   }
 
@@ -766,6 +774,11 @@
   template<typename... Args>
   explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
 
+  // Must be called on pointers that already have been relocated to the destination relocation.
+  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
+    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
+  }
+
   template <typename T>
   T* operator()(T* obj) const {
     return ForwardObject(obj);
@@ -816,7 +829,10 @@
 class FixupObjectVisitor : public FixupVisitor {
  public:
   template<typename... Args>
-  explicit FixupObjectVisitor(Args... args) : FixupVisitor(args...) {}
+  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* pointer_array_visited,
+                              Args... args)
+      : FixupVisitor(args...),
+        pointer_array_visited_(pointer_array_visited) {}
 
   // Fix up separately since we also need to fix up method entrypoints.
   ALWAYS_INLINE void VisitRootIfNonNull(
@@ -841,6 +857,19 @@
     }
   }
 
+  // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
+  // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
+  template <typename Visitor>
+  void VisitPointerArray(mirror::PointerArray* array, const Visitor& visitor) const
+      NO_THREAD_SAFETY_ANALYSIS {
+    if (array != nullptr &&
+        visitor.IsInAppImage(array) &&
+        !pointer_array_visited_->Test(array)) {
+      array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, sizeof(void*), visitor);
+      pointer_array_visited_->Set(array);
+    }
+  }
+
   // java.lang.ref.Reference visitor.
   void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
@@ -859,11 +888,9 @@
       mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
       FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
       klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, sizeof(void*), visitor);
-      // Deal with the arrays.
-      mirror::PointerArray* vtable = klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
-      if (vtable != nullptr) {
-        vtable->Fixup<kVerifyNone, kWithoutReadBarrier>(vtable, sizeof(void*), visitor);
-      }
+      // Deal with the pointer arrays. Use the helper function since multiple classes can reference
+      // the same arrays.
+      VisitPointerArray(klass->GetVTable<kVerifyNone, kWithoutReadBarrier>(), visitor);
       mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
       if (iftable != nullptr) {
         for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
@@ -871,12 +898,15 @@
             mirror::PointerArray* methods =
                 iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
             DCHECK(methods != nullptr);
-            methods->Fixup<kVerifyNone, kWithoutReadBarrier>(methods, sizeof(void*), visitor);
+            VisitPointerArray(methods, visitor);
           }
         }
       }
     }
   }
+
+ private:
+  gc::accounting::ContinuousSpaceBitmap* const pointer_array_visited_;
 };
 
 class ForwardObjectAdapter {
@@ -1010,9 +1040,18 @@
   const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
   uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
   uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
-  // Two pass approach, fix up all classes first, then fix up non class-objects.
-  FixupObjectVisitor fixup_object_visitor(boot_image, boot_oat, app_image, app_oat);
   if (fixup_image) {
+    // Two pass approach, fix up all classes first, then fix up non class-objects.
+    // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
+    std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
+        gc::accounting::ContinuousSpaceBitmap::Create("Pointer array bitmap",
+                                                      target_base,
+                                                      image_header.GetImageSize()));
+    FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
+                                            boot_image,
+                                            boot_oat,
+                                            app_image,
+                                            app_oat);
     TimingLogger::ScopedTiming timing("Fixup classes", &logger);
     // Fixup class only touches app image classes, don't need the mutator lock since the space is
     // not yet visible to the GC.
@@ -1025,7 +1064,7 @@
     bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
     FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
     // Fixup image roots.
-    CHECK(app_image.ContainsSource(reinterpret_cast<uintptr_t>(
+    CHECK(app_image.InSource(reinterpret_cast<uintptr_t>(
         image_header.GetImageRoots<kWithoutReadBarrier>())));
     image_header.RelocateImageObjects(app_image.Delta());
     CHECK_EQ(image_header.GetImageBegin(), target_base);
diff --git a/runtime/gc/space/memory_tool_malloc_space-inl.h b/runtime/gc/space/memory_tool_malloc_space-inl.h
index ea8b8aa..6cb246553 100644
--- a/runtime/gc/space/memory_tool_malloc_space-inl.h
+++ b/runtime/gc/space/memory_tool_malloc_space-inl.h
@@ -240,9 +240,9 @@
                     kAdjustForRedzoneInAllocSize,
                     kUseObjSizeForUsable>::MemoryToolMallocSpace(
     MemMap* mem_map, size_t initial_size, Params... params) : S(mem_map, initial_size, params...) {
-  MEMORY_TOOL_MAKE_DEFINED(mem_map->Begin(), initial_size);
-  MEMORY_TOOL_MAKE_UNDEFINED(mem_map->Begin() + initial_size,
-                     mem_map->Size() - initial_size);
+  // Don't want to change the valgrind states of the mem map here as the allocator is already
+  // initialized at this point and that may interfere with what the allocator does internally. Note
+  // that the tail beyond the initial size is mprotected.
 }
 
 template <typename S,
diff --git a/runtime/image.h b/runtime/image.h
index c449e43..146ee00 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -143,6 +143,8 @@
     oat_checksum_ = oat_checksum;
   }
 
+  // The location that the oat file was expected to be when the image was created. The actual
+  // oat file may be at a different location for application images.
   uint8_t* GetOatFileBegin() const {
     return reinterpret_cast<uint8_t*>(oat_file_begin_);
   }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 56aeefc..e3cbf53 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -290,6 +290,14 @@
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_ || have_invoke_virtual_or_interface_listeners_;
+  }
+
+  // Any instrumentation *other* than what is needed for Jit profiling active?
+  bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
+        have_field_read_listeners_ || have_field_write_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 4fd3c78..a595d33 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -320,12 +320,13 @@
         // No Mterp variant - just use the switch interpreter.
         return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
                                               false);
+      } else if (UNLIKELY(!Runtime::Current()->IsStarted())) {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       } else {
-        const instrumentation::Instrumentation* const instrumentation =
-            Runtime::Current()->GetInstrumentation();
         while (true) {
-          if (instrumentation->IsActive() || !Runtime::Current()->IsStarted()) {
-            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
+          // Mterp does not support all instrumentation/debugging.
+          if (MterpShouldSwitchInterpreters()) {
 #if !defined(__clang__)
             return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
 #else
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index cbaa817..3453abc 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -733,39 +733,21 @@
   }
 
   if (string_init && !self->IsExceptionPending()) {
-    // Set the new string result of the StringFactory.
-    shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
-    // Overwrite all potential copies of the original result of the new-instance of string with the
-    // new result of the StringFactory. Use the verifier to find this set of registers.
-    ArtMethod* method = shadow_frame.GetMethod();
-    MethodReference method_ref = method->ToMethodReference();
-    SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr = nullptr;
-    MethodRefToStringInitRegMap& method_to_string_init_map = Runtime::Current()->GetStringInitMap();
-    {
-      MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.find(method_ref);
-      if (it != method_to_string_init_map.end()) {
-        string_init_map_ptr = &it->second;
-      }
-    }
-    if (string_init_map_ptr == nullptr) {
-      SafeMap<uint32_t, std::set<uint32_t>> string_init_map =
-          verifier::MethodVerifier::FindStringInitMap(method);
-      MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.lower_bound(method_ref);
-      if (it == method_to_string_init_map.end() ||
-          method_to_string_init_map.key_comp()(method_ref, it->first)) {
-        it = method_to_string_init_map.PutBefore(it, method_ref, std::move(string_init_map));
-      }
-      string_init_map_ptr = &it->second;
-    }
-    if (string_init_map_ptr->size() != 0) {
-      uint32_t dex_pc = shadow_frame.GetDexPC();
-      auto map_it = string_init_map_ptr->find(dex_pc);
-      if (map_it != string_init_map_ptr->end()) {
-        const std::set<uint32_t>& reg_set = map_it->second;
-        for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
-          shadow_frame.SetVRegReference(*set_it, result->GetL());
+    mirror::Object* existing = shadow_frame.GetVRegReference(string_init_vreg_this);
+    if (existing == nullptr) {
+      // If it's null, we come from compiled code that was deoptimized. Nothing to do,
+      // as the compiler verified there was no alias.
+      // Set the new string result of the StringFactory.
+      shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
+    } else {
+      // Replace the fake string that was allocated with the StringFactory result.
+      for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
+        if (shadow_frame.GetVRegReference(i) == existing) {
+          DCHECK_EQ(shadow_frame.GetVRegReference(i),
+                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
+          shadow_frame.SetVRegReference(i, result->GetL());
+          DCHECK_EQ(shadow_frame.GetVRegReference(i),
+                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
         }
       }
     }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 949112d..19d971e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -948,11 +948,15 @@
   __attribute__((cold))
   SHARED_REQUIRES(Locks::mutator_lock_);
 
+static inline bool TraceExecutionEnabled() {
+  // Return true if you want TraceExecution invocation before each bytecode execution.
+  return false;
+}
+
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  constexpr bool kTracing = false;
-  if (kTracing) {
+  if (TraceExecutionEnabled()) {
 #define TRACE_LOG std::cerr
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index 474bc3c..774e167 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -6,17 +6,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    b${revcmp} .L_${opcode}_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_${opcode}_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -25,10 +37,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    mov${revcmp} rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 1dba856..3456a75 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -12,7 +12,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -103,8 +102,12 @@
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     rPC, r0, #CODEITEM_INSNS_OFFSET
     add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
-    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -116,12 +119,31 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
-    EXPORT_PC
-    mov     r0, rSELF
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck           @ (self)
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
+1:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
 
 /*
  * Bail out to reference interpreter.
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index b2370bf..298af8a 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -85,6 +85,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -109,14 +112,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm/invoke.S b/runtime/interpreter/mterp/arm/invoke.S
index 7575865..e47dd1b 100644
--- a/runtime/interpreter/mterp/arm/invoke.S
+++ b/runtime/interpreter/mterp/arm/invoke.S
@@ -14,6 +14,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index 9b3632a..6861950 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -6,20 +6,28 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
-       @ If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 2231acd..91639ca 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,17 +5,25 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+#if MTERP_PROFILE_BRANCHES
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index 6b72ff5..e730b52 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,21 +10,29 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 1e3370e..4c369cb 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,7 +9,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -17,9 +17,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -30,8 +39,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 6e9ef55..800804d 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -6,25 +6,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    b${revcmp} .L_${opcode}_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_${opcode}_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    mov${revcmp} rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index ecab2ce..ed850fc 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -6,17 +6,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    mov${condition} w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.${condition} .L_${opcode}_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_${opcode}_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -25,11 +36,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ${condition}    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index b360539..aae78de 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -10,7 +10,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -99,8 +98,11 @@
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     xPC, x0, #CODEITEM_INSNS_OFFSET
     add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -120,10 +122,24 @@
     EXPORT_PC
     mov     x0, xSELF
     bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 351a607..7223750 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -87,6 +87,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -114,14 +117,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm64/invoke.S b/runtime/interpreter/mterp/arm64/invoke.S
index ff1974c..7a32df7 100644
--- a/runtime/interpreter/mterp/arm64/invoke.S
+++ b/runtime/interpreter/mterp/arm64/invoke.S
@@ -9,11 +9,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      $helper
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index db98a45..7e2f6a9 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -6,23 +6,20 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsl #16          // w0<- AAxx0000
-    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
-    add     w2, w1, w1                  // w2<- byte offset, set flags
-       // If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
     lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
-    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index ff66a23..b2b9924 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -5,19 +5,18 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index 8a6980e..b785857 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -10,23 +10,20 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH w0, 1                         // w0<- aaaa (lo)
-    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
+    adds    w1, wINST, wINST            // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
index 165c730..88533bd 100644
--- a/runtime/interpreter/mterp/arm64/op_iget.S
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromCode"}
     /*
      * General instance field get.
      *
@@ -12,6 +12,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       $helper
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    $extend
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index f087d23..e8b4f04 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,20 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      $func                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -30,10 +16,18 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index d4856d2..e528d9f 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -6,26 +6,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    mov${condition} w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.${condition} .L_${opcode}_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_${opcode}_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ${condition}    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 0afd276..1a90acd 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -20,6 +20,8 @@
 #include "interpreter/interpreter_common.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
+#include "jit/jit.h"
+#include "debugger.h"
 
 namespace art {
 namespace interpreter {
@@ -45,7 +47,9 @@
 void InitMterpTls(Thread* self) {
   self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
   self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
-  self->SetMterpCurrentIBase(artMterpAsmInstructionStart);
+  self->SetMterpCurrentIBase(TraceExecutionEnabled() ?
+                             artMterpAsmAltInstructionStart :
+                             artMterpAsmInstructionStart);
 }
 
 /*
@@ -139,6 +143,20 @@
   return entries[index];
 }
 
+extern "C" bool MterpShouldSwitchInterpreters()
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  bool unhandled_instrumentation;
+  // TODO: enable for other targets after more extensive testing.
+  if (kRuntimeISA == kArm64) {
+    unhandled_instrumentation = instrumentation->NonJitProfilingActive();
+  } else {
+    unhandled_instrumentation = instrumentation->IsActive();
+  }
+  return unhandled_instrumentation || Dbg::IsDebuggerActive();
+}
+
 
 extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
                                    uint16_t* dex_pc_ptr,  uint16_t inst_data )
@@ -429,6 +447,7 @@
   } else {
     self->AssertNoPendingException();
   }
+  TraceExecution(*shadow_frame, inst, shadow_frame->GetDexPC());
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
@@ -488,6 +507,14 @@
             << self->IsExceptionPending();
 }
 
+extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "OSR: " << inst->Opcode(inst_data) << ", offset = " << offset;
+}
+
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
@@ -500,9 +527,10 @@
   }
 }
 
-extern "C" void MterpSuspendCheck(Thread* self)
+extern "C" bool MterpSuspendCheck(Thread* self)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
+  return MterpShouldSwitchInterpreters();
 }
 
 extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
@@ -618,5 +646,15 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
+extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  instrumentation->Branch(self, method, dex_pc, offset);
+  return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
index 90d21e9..8d24641 100644
--- a/runtime/interpreter/mterp/mterp.h
+++ b/runtime/interpreter/mterp/mterp.h
@@ -30,6 +30,7 @@
 
 void InitMterpTls(Thread* self);
 void CheckMterpAsmConstants();
+extern "C" bool MterpShouldSwitchInterpreters();
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index ee19559..519f896 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -92,6 +92,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -116,14 +119,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1111,20 +1106,28 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
-       @ If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
@@ -1143,17 +1146,25 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+#if MTERP_PROFILE_BRANCHES
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1176,21 +1187,29 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1211,7 +1230,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1219,9 +1238,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1232,8 +1260,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1255,7 +1284,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1263,9 +1292,18 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
-    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1276,8 +1314,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1495,17 +1534,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bne .L_op_if_eq_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_eq_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1514,10 +1565,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movne rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1538,17 +1589,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    beq .L_op_if_ne_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ne_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1557,10 +1620,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    moveq rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1581,17 +1644,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bge .L_op_if_lt_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_lt_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1600,10 +1675,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movge rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1624,17 +1699,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    blt .L_op_if_ge_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ge_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1643,10 +1730,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movlt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1667,17 +1754,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    ble .L_op_if_gt_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gt_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1686,10 +1785,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movle rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1710,17 +1809,29 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    bgt .L_op_if_le_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_le_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1729,10 +1840,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movgt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1753,25 +1864,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bne .L_op_if_eqz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_eqz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movne rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1792,25 +1915,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    beq .L_op_if_nez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_nez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    moveq rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1831,25 +1966,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bge .L_op_if_ltz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_ltz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movge rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1870,25 +2017,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    blt .L_op_if_gez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movlt rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1909,25 +2068,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    ble .L_op_if_gtz_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_gtz_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movle rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1948,25 +2119,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
+#if MTERP_PROFILE_BRANCHES
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    bgt .L_op_if_lez_not_taken
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+.L_op_if_lez_not_taken:
+    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movgt rINST, #2              @ rINST<- inst branch dist for not-taken
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -3294,6 +3477,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3326,6 +3512,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3358,6 +3547,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3383,6 +3575,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3409,6 +3604,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3453,6 +3651,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3478,6 +3679,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3503,6 +3707,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3528,6 +3735,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3553,6 +3763,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -7284,6 +7497,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -7309,6 +7525,9 @@
     cmp     r0, #0
     beq     MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -12098,7 +12317,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -12189,8 +12407,12 @@
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     rPC, r0, #CODEITEM_INSNS_OFFSET
     add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
-    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cmp     r0, #0
+    bne     MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -12202,12 +12424,31 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
-    EXPORT_PC
-    mov     r0, rSELF
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    blne    MterpSuspendCheck           @ (self)
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
+1:
+    EXPORT_PC
+    mov     r0, rSELF
+    bl      MterpSuspendCheck           @ (self)
+    cmp     r0, #0
+    bne     MterpFallback
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    mov r0, #1                          @ Signal normal return
+    b MterpDone
 
 /*
  * Bail out to reference interpreter.
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index e9d28ab..e4825f0 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -94,6 +94,9 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+#define MTERP_LOGGING 0
+
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -121,14 +124,6 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
- *
- * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
- * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
- * mterp should do so as well.
- */
-#define MTERP_SUSPEND 0
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1087,26 +1082,23 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsl #16          // w0<- AAxx0000
-    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
-    add     w2, w1, w1                  // w2<- byte offset, set flags
-       // If backwards branch refresh rIBASE
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
     lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
-    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1119,22 +1111,21 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_SUSPEND
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1152,26 +1143,23 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
-    FETCH w0, 1                         // w0<- aaaa (lo)
-    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
-    adds    w1, w0, w0                  // w1<- byte offset
+    adds    w1, wINST, wINST            // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1187,20 +1175,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1208,13 +1182,21 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1231,20 +1213,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_SUSPEND
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
-    mov     w3, wINST, lsr #8           // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
-    GET_VREG w1, w3                     // w1<- vAA
-    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
-    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
-    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1252,13 +1220,21 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
-#endif
 
 
 /* ------------------------------ */
@@ -1396,17 +1372,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    moveq w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.eq .L_op_if_eq_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_eq_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1415,11 +1402,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, eq    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1440,17 +1427,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movne w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.ne .L_op_if_ne_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ne_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1459,11 +1457,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ne    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1484,17 +1482,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movlt w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.lt .L_op_if_lt_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_lt_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1503,11 +1512,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, lt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1528,17 +1537,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movge w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.ge .L_op_if_ge_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ge_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1547,11 +1567,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, ge    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1572,17 +1592,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movgt w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.gt .L_op_if_gt_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gt_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1591,11 +1622,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, gt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1616,17 +1647,28 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_SUSPEND
-    mov     w1, wINST, lsr #12          // w1<- B
+#if MTERP_PROFILE_BRANCHES
+    lsr     w1, wINST, #12              // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    movle w1, #2                 // w1<- BYTE branch dist for not-taken
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    b.le .L_op_if_le_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_le_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1635,11 +1677,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    w1, w1, w0, le    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes, check sign
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1660,26 +1702,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    moveq w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.eq .L_op_if_eqz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_eqz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, eq    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1700,26 +1753,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movne w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.ne .L_op_if_nez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_nez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ne    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1740,26 +1804,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movlt w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.lt .L_op_if_ltz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_ltz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, lt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1780,26 +1855,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movge w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.ge .L_op_if_gez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, ge    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1820,26 +1906,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movgt w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.gt .L_op_if_gtz_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_gtz_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, gt    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1860,26 +1957,37 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_SUSPEND
-    mov     w0, wINST, lsr #8           // w0<- AA
+#if MTERP_PROFILE_BRANCHES
+    lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    movle w1, #2                 // w1<- inst branch dist for not-taken
-    adds    w1, w1, w1                  // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    b.le .L_op_if_lez_taken
+    FETCH_ADVANCE_INST 2                // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+.L_op_if_lez_taken:
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    w1, w1, w0, le    // Branch if true
-    adds    w2, w1, w1                  // convert to bytes & set flags
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -2401,6 +2509,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGet32InstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2457,6 +2566,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetObjInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2488,6 +2598,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetBooleanInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2519,6 +2630,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetByteInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2550,6 +2662,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetCharInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    uxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2581,6 +2694,7 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetShortInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    sxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -3158,11 +3272,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtual
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3190,11 +3305,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeSuper
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3222,11 +3338,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeDirect
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3247,11 +3364,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeStatic
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3273,11 +3391,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeInterface
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3320,11 +3439,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3345,11 +3465,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeSuperRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3370,11 +3491,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeDirectRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3395,11 +3517,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeStaticRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -3420,11 +3543,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeInterfaceRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -6852,11 +6976,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualQuick
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -6877,11 +7002,12 @@
     mov     x0, xSELF
     add     x1, xFP, #OFF_FP_SHADOWFRAME
     mov     x2, xPC
-    // and     x3, xINST, 0xFFFF
     mov     x3, xINST
     bl      MterpInvokeVirtualQuickRange
     cbz     w0, MterpException
     FETCH_ADVANCE_INST 3
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
 
@@ -11565,7 +11691,6 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
-#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -11654,8 +11779,11 @@
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
     add     xPC, x0, #CODEITEM_INSNS_OFFSET
     add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
     /* resume execution at catch block */
+    EXPORT_PC
     FETCH_INST
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
@@ -11675,10 +11803,24 @@
     EXPORT_PC
     mov     x0, xSELF
     bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 96229ce..d365a4f 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -189,11 +189,6 @@
 
 /*
  * Refresh handler table.
- * IBase handles uses the caller save register so we must restore it after each call.
- * Also it is used as a result of some 64-bit operations (like imul) and we should
- * restore it in such cases also.
- *
- * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
  */
 .macro REFRESH_IBASE
     movl    rSELF, rIBASE
@@ -201,9 +196,22 @@
 .endm
 
 /*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
  * If rSELF is already loaded then we can use it from known reg.
  */
-.macro REFRESH_IBASE_FROM_SELF _reg
+.macro RESTORE_IBASE_FROM_SELF _reg
     movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
 .endm
 
@@ -771,8 +779,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -790,8 +798,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -809,8 +817,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -828,8 +836,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artLockObjectFromCode)   # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -851,8 +859,8 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artUnlockObjectFromCode) # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -874,8 +882,8 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -903,7 +911,7 @@
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     andb    $0xf, rINSTbl                  # rINSTbl <- A
@@ -943,8 +951,8 @@
     REFRESH_INST 34
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpNewInstance)
-    REFRESH_IBASE
-    testl   %eax, %eax                 # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -969,8 +977,8 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpNewArray)
-    REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -994,7 +1002,7 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL(MterpFilledNewArray)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1019,7 +1027,7 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL(MterpFilledNewArrayRange)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1037,7 +1045,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
@@ -1923,7 +1931,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINST
@@ -2090,8 +2098,8 @@
     REFRESH_INST 77
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpAputObject)         # (array, index)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -2221,7 +2229,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGet32InstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2259,7 +2267,7 @@
     andb    $0xf, rINSTbl                  # rINST <- A
     SET_VREG %eax, rINST
     SET_VREG_HIGH %edx, rINST
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2285,7 +2293,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetObjInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2320,7 +2328,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetBooleanInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2355,7 +2363,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetByteInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2390,7 +2398,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetCharInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2425,7 +2433,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artGetShortInstanceFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
@@ -2461,9 +2469,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet32InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2485,9 +2493,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet64InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2503,9 +2511,9 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpIputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2533,9 +2541,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet8InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2564,9 +2572,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet8InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2595,9 +2603,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet16InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2626,9 +2634,9 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet16InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2652,7 +2660,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGet32StaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2685,7 +2693,7 @@
     jnz     MterpException
     SET_VREG %eax, rINST                    # fp[A]<- low part
     SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2709,7 +2717,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetObjStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 1
@@ -2741,7 +2749,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetBooleanStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2773,7 +2781,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetByteStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2805,7 +2813,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetCharStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2837,7 +2845,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL(artGetShortStaticFromCode)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if 0
@@ -2869,9 +2877,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet32StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2894,9 +2902,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet64IndirectStaticFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2912,9 +2920,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpSputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -2939,9 +2947,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet8StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2967,9 +2975,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet8StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -2995,9 +3003,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet16StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -3023,9 +3031,9 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet16StaticFromCode)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
@@ -3049,9 +3057,9 @@
     REFRESH_INST 110
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtual)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3082,9 +3090,9 @@
     REFRESH_INST 111
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeSuper)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3115,9 +3123,9 @@
     REFRESH_INST 112
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeDirect)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3141,9 +3149,9 @@
     REFRESH_INST 113
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeStatic)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3168,9 +3176,9 @@
     REFRESH_INST 114
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeInterface)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 /*
@@ -3215,9 +3223,9 @@
     REFRESH_INST 116
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3241,9 +3249,9 @@
     REFRESH_INST 117
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeSuperRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3267,9 +3275,9 @@
     REFRESH_INST 118
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeDirectRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3293,9 +3301,9 @@
     REFRESH_INST 119
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeStaticRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -3319,9 +3327,9 @@
     REFRESH_INST 120
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeInterfaceRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -4047,10 +4055,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $0xFFFFFF00, %edx              # If both arguments are less
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .Lop_div_int_8                   # Do 8-bit divide
-    test    $0xFFFF0000, %edx              # If both arguments are less
+    testl   $0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .Lop_div_int_16                  # Do 16-bit divide
     cmpl    $-1, %ecx
@@ -4101,10 +4109,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $0xFFFFFF00, %edx              # If both arguments are less
+    testl   $0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .Lop_rem_int_8                   # Do 8-bit divide
-    test    $0xFFFF0000, %edx              # If both arguments are less
+    testl   $0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .Lop_rem_int_16                  # Do 16-bit divide
     cmpl    $-1, %ecx
@@ -4785,9 +4793,9 @@
     sarl    $4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $0xf, %cl                      # ecx <- A
-    mov     rIBASE, LOCAL0(%esp)
+    movl    rIBASE, rINST
     imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movl    rINST, rIBASE
     SET_VREG %eax, %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -5514,11 +5522,11 @@
     movzbl  rINSTbl, %eax                   # eax <- 000000BA
     sarl    $4, %eax                       # eax <- B
     GET_VREG %eax, %eax                     # eax <- vB
-    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
     andb    $0xf, rINSTbl                  # rINST <- A
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -5721,11 +5729,11 @@
 /* File: x86/op_mul_int_lit8.S */
     /* mul/lit8 vAA, vBB, #+CC */
     movzbl  2(rPC), %eax                    # eax <- BB
-    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    movl    rIBASE, %ecx
     GET_VREG  %eax, %eax                    # eax <- rBB
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -5985,7 +5993,7 @@
     EXPORT_PC
     call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $0xf,rINSTbl                   # rINST <- A
@@ -6037,9 +6045,9 @@
     REFRESH_INST 232
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpIputObjectQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 /* ------------------------------ */
@@ -6062,9 +6070,9 @@
     REFRESH_INST 233
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -6088,9 +6096,9 @@
     REFRESH_INST 234
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL(MterpInvokeVirtualQuickRange)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
 
 
@@ -12912,7 +12920,7 @@
     lea     OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpHandleException)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpExceptionReturn
     REFRESH_IBASE
     movl    OFF_FP_CODE_ITEM(rFP), %eax
diff --git a/runtime/interpreter/mterp/x86/bindiv.S b/runtime/interpreter/mterp/x86/bindiv.S
index bb5b319..e87ba45 100644
--- a/runtime/interpreter/mterp/x86/bindiv.S
+++ b/runtime/interpreter/mterp/x86/bindiv.S
@@ -13,10 +13,10 @@
     je      common_errDivideByZero
     movl    %eax, %edx
     orl     %ecx, %edx
-    test    $$0xFFFFFF00, %edx              # If both arguments are less
+    testl   $$0xFFFFFF00, %edx              # If both arguments are less
                                             #   than 8-bit and +ve
     jz      .L${opcode}_8                   # Do 8-bit divide
-    test    $$0xFFFF0000, %edx              # If both arguments are less
+    testl   $$0xFFFF0000, %edx              # If both arguments are less
                                             #   than 16-bit and +ve
     jz      .L${opcode}_16                  # Do 16-bit divide
     cmpl    $$-1, %ecx
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index 385e784..a1532fa 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -114,7 +114,7 @@
     lea     OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpHandleException)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpExceptionReturn
     REFRESH_IBASE
     movl    OFF_FP_CODE_ITEM(rFP), %eax
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
index 0977b90..3fbbbf9 100644
--- a/runtime/interpreter/mterp/x86/header.S
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -182,11 +182,6 @@
 
 /*
  * Refresh handler table.
- * IBase handles uses the caller save register so we must restore it after each call.
- * Also it is used as a result of some 64-bit operations (like imul) and we should
- * restore it in such cases also.
- *
- * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
  */
 .macro REFRESH_IBASE
     movl    rSELF, rIBASE
@@ -194,9 +189,22 @@
 .endm
 
 /*
+ * Refresh handler table.
+ * IBase handles uses the caller save register so we must restore it after each call.
+ * Also it is used as a result of some 64-bit operations (like imul) and we should
+ * restore it in such cases also.
+ *
+ * TODO: Consider spilling the IBase instead of restoring it from Thread structure.
+ */
+.macro RESTORE_IBASE
+    movl    rSELF, rIBASE
+    movl    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+.endm
+
+/*
  * If rSELF is already loaded then we can use it from known reg.
  */
-.macro REFRESH_IBASE_FROM_SELF _reg
+.macro RESTORE_IBASE_FROM_SELF _reg
     movl    THREAD_CURRENT_IBASE_OFFSET(\_reg), rIBASE
 .endm
 
diff --git a/runtime/interpreter/mterp/x86/invoke.S b/runtime/interpreter/mterp/x86/invoke.S
index 054fbfd..bbd88cf 100644
--- a/runtime/interpreter/mterp/x86/invoke.S
+++ b/runtime/interpreter/mterp/x86/invoke.S
@@ -14,7 +14,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG3(%esp)
     call    SYMBOL($helper)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_aget_object.S b/runtime/interpreter/mterp/x86/op_aget_object.S
index cbfb50c..35ec053 100644
--- a/runtime/interpreter/mterp/x86/op_aget_object.S
+++ b/runtime/interpreter/mterp/x86/op_aget_object.S
@@ -13,7 +13,7 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINST
diff --git a/runtime/interpreter/mterp/x86/op_aput_object.S b/runtime/interpreter/mterp/x86/op_aput_object.S
index 9cfc221..980b26a 100644
--- a/runtime/interpreter/mterp/x86/op_aput_object.S
+++ b/runtime/interpreter/mterp/x86/op_aput_object.S
@@ -9,7 +9,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpAputObject)         # (array, index)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_check_cast.S b/runtime/interpreter/mterp/x86/op_check_cast.S
index ae2ff9e..d090aa3 100644
--- a/runtime/interpreter/mterp/x86/op_check_cast.S
+++ b/runtime/interpreter/mterp/x86/op_check_cast.S
@@ -12,7 +12,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpCheckCast)          # (index, &obj, method, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_class.S b/runtime/interpreter/mterp/x86/op_const_class.S
index 343e110..60be789 100644
--- a/runtime/interpreter/mterp/x86/op_const_class.S
+++ b/runtime/interpreter/mterp/x86/op_const_class.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstClass)         # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string.S b/runtime/interpreter/mterp/x86/op_const_string.S
index bbac69c..ff93b23 100644
--- a/runtime/interpreter/mterp/x86/op_const_string.S
+++ b/runtime/interpreter/mterp/x86/op_const_string.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
index 4236807..e7f952a 100644
--- a/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/x86/op_const_string_jumbo.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpConstString)        # (index, tgt_reg, shadow_frame, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_fill_array_data.S b/runtime/interpreter/mterp/x86/op_fill_array_data.S
index 004aed9..5855284 100644
--- a/runtime/interpreter/mterp/x86/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/x86/op_fill_array_data.S
@@ -7,6 +7,6 @@
     movl    %ecx, OUT_ARG1(%esp)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_filled_new_array.S b/runtime/interpreter/mterp/x86/op_filled_new_array.S
index a2bac29..35b2fe8 100644
--- a/runtime/interpreter/mterp/x86/op_filled_new_array.S
+++ b/runtime/interpreter/mterp/x86/op_filled_new_array.S
@@ -15,6 +15,6 @@
     movl    %ecx, OUT_ARG2(%esp)
     call    SYMBOL($helper)
     REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
index 9932610..e3304ba 100644
--- a/runtime/interpreter/mterp/x86/op_iget.S
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -17,7 +17,7 @@
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL($helper)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
diff --git a/runtime/interpreter/mterp/x86/op_iget_object_quick.S b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
index fe16694..b1551a0 100644
--- a/runtime/interpreter/mterp/x86/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/x86/op_iget_object_quick.S
@@ -9,7 +9,7 @@
     EXPORT_PC
     call    SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
     andb    $$0xf,rINSTbl                   # rINST <- A
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
index 92126b4..a5d7e69 100644
--- a/runtime/interpreter/mterp/x86/op_iget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -21,5 +21,5 @@
     andb    $$0xf, rINSTbl                  # rINST <- A
     SET_VREG %eax, rINST
     SET_VREG_HIGH %edx, rINST
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_instance_of.S b/runtime/interpreter/mterp/x86/op_instance_of.S
index fd5bf44..e6fe5b2 100644
--- a/runtime/interpreter/mterp/x86/op_instance_of.S
+++ b/runtime/interpreter/mterp/x86/op_instance_of.S
@@ -18,7 +18,7 @@
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     andb    $$0xf, rINSTbl                  # rINSTbl <- A
diff --git a/runtime/interpreter/mterp/x86/op_iput.S b/runtime/interpreter/mterp/x86/op_iput.S
index 13cfe5c..c847e2d 100644
--- a/runtime/interpreter/mterp/x86/op_iput.S
+++ b/runtime/interpreter/mterp/x86/op_iput.S
@@ -19,7 +19,7 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL($handler)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object.S b/runtime/interpreter/mterp/x86/op_iput_object.S
index f63075c..e013697 100644
--- a/runtime/interpreter/mterp/x86/op_iput_object.S
+++ b/runtime/interpreter/mterp/x86/op_iput_object.S
@@ -7,7 +7,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG3(%esp)
     call    SYMBOL(MterpIputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_object_quick.S b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
index d54b1b7..cb77929 100644
--- a/runtime/interpreter/mterp/x86/op_iput_object_quick.S
+++ b/runtime/interpreter/mterp/x86/op_iput_object_quick.S
@@ -5,7 +5,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpIputObjectQuick)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_iput_wide.S b/runtime/interpreter/mterp/x86/op_iput_wide.S
index 573e14d..122eecf 100644
--- a/runtime/interpreter/mterp/x86/op_iput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iput_wide.S
@@ -13,7 +13,7 @@
     movl    OFF_FP_METHOD(rFP), %eax
     movl    %eax, OUT_ARG3(%esp)            # referrer
     call    SYMBOL(artSet64InstanceFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpPossibleException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_monitor_enter.S b/runtime/interpreter/mterp/x86/op_monitor_enter.S
index 9e885bd..b35c684 100644
--- a/runtime/interpreter/mterp/x86/op_monitor_enter.S
+++ b/runtime/interpreter/mterp/x86/op_monitor_enter.S
@@ -8,7 +8,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artLockObjectFromCode)   # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_monitor_exit.S b/runtime/interpreter/mterp/x86/op_monitor_exit.S
index 0904800..2d17d5e 100644
--- a/runtime/interpreter/mterp/x86/op_monitor_exit.S
+++ b/runtime/interpreter/mterp/x86/op_monitor_exit.S
@@ -12,7 +12,7 @@
     movl    rSELF, %eax
     movl    %eax, OUT_ARG1(%esp)
     call    SYMBOL(artUnlockObjectFromCode) # (object, self)
-    REFRESH_IBASE
-    testl   %eax, %eax
+    RESTORE_IBASE
+    testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
index f92a28e..da699ae 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_2addr.S
@@ -3,8 +3,8 @@
     sarl    $$4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $$0xf, %cl                      # ecx <- A
-    mov     rIBASE, LOCAL0(%esp)
+    movl    rIBASE, rINST
     imull   (rFP,%ecx,4), %eax              # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movl    rINST, rIBASE
     SET_VREG %eax, %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
index 31ab613..056f491 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit16.S
@@ -3,10 +3,10 @@
     movzbl  rINSTbl, %eax                   # eax <- 000000BA
     sarl    $$4, %eax                       # eax <- B
     GET_VREG %eax, %eax                     # eax <- vB
-    movswl  2(rPC), %ecx                    # ecx <- ssssCCCC
+    movl    rIBASE, %ecx
+    movswl  2(rPC), rIBASE                  # rIBASE <- ssssCCCC
     andb    $$0xf, rINSTbl                  # rINST <- A
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
index 6637aa7..59b3844 100644
--- a/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
+++ b/runtime/interpreter/mterp/x86/op_mul_int_lit8.S
@@ -1,9 +1,9 @@
     /* mul/lit8 vAA, vBB, #+CC */
     movzbl  2(rPC), %eax                    # eax <- BB
-    movsbl  3(rPC), %ecx                    # ecx <- ssssssCC
+    movl    rIBASE, %ecx
     GET_VREG  %eax, %eax                    # eax <- rBB
-    mov     rIBASE, LOCAL0(%esp)
-    imull   %ecx, %eax                      # trashes rIBASE/edx
-    mov     LOCAL0(%esp), rIBASE
+    movsbl  3(rPC), rIBASE                  # rIBASE <- ssssssCC
+    imull   rIBASE, %eax                    # trashes rIBASE/edx
+    movl    %ecx, rIBASE
     SET_VREG %eax, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_array.S b/runtime/interpreter/mterp/x86/op_new_array.S
index 2490477..16226e9 100644
--- a/runtime/interpreter/mterp/x86/op_new_array.S
+++ b/runtime/interpreter/mterp/x86/op_new_array.S
@@ -15,7 +15,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpNewArray)
-    REFRESH_IBASE
-    testl   %eax, %eax                      # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_new_instance.S b/runtime/interpreter/mterp/x86/op_new_instance.S
index 712a5eb..f976acc 100644
--- a/runtime/interpreter/mterp/x86/op_new_instance.S
+++ b/runtime/interpreter/mterp/x86/op_new_instance.S
@@ -10,7 +10,7 @@
     REFRESH_INST ${opnum}
     movl    rINST, OUT_ARG2(%esp)
     call    SYMBOL(MterpNewInstance)
-    REFRESH_IBASE
-    testl   %eax, %eax                 # 0 means an exception is thrown
+    RESTORE_IBASE
+    testb   %al, %al                        # 0 means an exception is thrown
     jz      MterpPossibleException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
index ec96458..0e9a3d8 100644
--- a/runtime/interpreter/mterp/x86/op_sget.S
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -15,7 +15,7 @@
     movl    %ecx, OUT_ARG2(%esp)            # self
     call    SYMBOL($helper)
     movl    rSELF, %ecx
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
     .if $is_object
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
index 833f266..2b60303 100644
--- a/runtime/interpreter/mterp/x86/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -17,5 +17,5 @@
     jnz     MterpException
     SET_VREG %eax, rINST                    # fp[A]<- low part
     SET_VREG_HIGH %edx, rINST               # fp[A+1]<- high part
-    REFRESH_IBASE_FROM_SELF %ecx
+    RESTORE_IBASE_FROM_SELF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
index a199281..0b5de09 100644
--- a/runtime/interpreter/mterp/x86/op_sput.S
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -16,7 +16,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL($helper)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_object.S b/runtime/interpreter/mterp/x86/op_sput_object.S
index e3e57fc..0db5177 100644
--- a/runtime/interpreter/mterp/x86/op_sput_object.S
+++ b/runtime/interpreter/mterp/x86/op_sput_object.S
@@ -7,7 +7,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)
     call    SYMBOL(MterpSputObject)
-    testl   %eax, %eax
+    testb   %al, %al
     jz      MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
index 7544838..19cff0d 100644
--- a/runtime/interpreter/mterp/x86/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -14,7 +14,7 @@
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
     call    SYMBOL(artSet64IndirectStaticFromMterp)
-    testl   %eax, %eax
+    testb   %al, %al
     jnz     MterpException
-    REFRESH_IBASE
+    RESTORE_IBASE
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 188deb0..bdc7ee2 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -113,8 +113,7 @@
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>(
-      dlsym(jit_library_handle_, "jit_load"));
+  jit_load_ = reinterpret_cast<void* (*)(bool*)>(dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't find jit_load entry point";
@@ -141,23 +140,15 @@
     *error_msg = "JIT couldn't find jit_types_loaded entry point";
     return false;
   }
-  CompilerCallbacks* callbacks = nullptr;
   bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols);
+  jit_compiler_handle_ = (jit_load_)(&will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
     return false;
   }
-  if (callbacks == nullptr) {
-    dlclose(jit_library_handle_);
-    *error_msg = "JIT compiler callbacks were not set";
-    jit_compiler_handle_ = nullptr;
-    return false;
-  }
-  compiler_callbacks_ = callbacks;
   generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
@@ -294,6 +285,14 @@
     return false;
   }
 
+  if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
+    // Don't attempt to do an OSR if we are close to the stack limit. Since
+    // the interpreter frames are still on stack, OSR has the potential
+    // to stack overflow even for a simple loop.
+    // b/27094810.
+    return false;
+  }
+
   // Get the actual Java method if this method is from a proxy class. The compiler
   // and the JIT code cache do not expect methods from proxy classes.
   method = method->GetInterfaceMethodIfProxy(sizeof(void*));
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 042da92..109ca3d 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -32,7 +32,6 @@
 namespace art {
 
 class ArtMethod;
-class CompilerCallbacks;
 struct RuntimeArgumentMap;
 
 namespace jit {
@@ -55,9 +54,6 @@
                                   size_t warmup_threshold,
                                   size_t osr_threshold);
   void CreateThreadPool();
-  CompilerCallbacks* GetCompilerCallbacks() {
-    return compiler_callbacks_;
-  }
   const JitCodeCache* GetCodeCache() const {
     return code_cache_.get();
   }
@@ -108,7 +104,7 @@
   // JIT compiler
   void* jit_library_handle_;
   void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**, bool*);
+  void* (*jit_load_)(bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
   void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
@@ -119,7 +115,6 @@
 
   std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
-  CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
   bool save_profiling_info_;
   bool generate_debug_info_;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 9111ddf..71d16e0 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -232,25 +232,20 @@
 void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  const uint8_t* data = method_header->GetNativeGcMap();
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  if (data != nullptr) {
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
-  }
-  data = method_header->GetMappingTable();
-  if (data != nullptr) {
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
-  }
+
+  FreeData(const_cast<uint8_t*>(method_header->GetNativeGcMap()));
+  FreeData(const_cast<uint8_t*>(method_header->GetMappingTable()));
   // Use the offset directly to prevent sanity check that the method is
   // compiled with optimizing.
   // TODO(ngeoffray): Clean up.
   if (method_header->vmap_table_offset_ != 0) {
-    data = method_header->code_ - method_header->vmap_table_offset_;
-    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
+    const uint8_t* data = method_header->code_ - method_header->vmap_table_offset_;
+    FreeData(const_cast<uint8_t*>(data));
   }
-  mspace_free(code_mspace_, reinterpret_cast<uint8_t*>(allocation));
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
@@ -269,11 +264,19 @@
       }
     }
   }
+  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+    if (alloc.ContainsUnsafe(it->first)) {
+      // Note that the code has already been removed in the loop above.
+      it = osr_code_map_.erase(it);
+    } else {
+      ++it;
+    }
+  }
   for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
     ProfilingInfo* info = *it;
     if (alloc.ContainsUnsafe(info->GetMethod())) {
       info->GetMethod()->SetProfilingInfo(nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+      FreeData(reinterpret_cast<uint8_t*>(info));
       it = profiling_infos_.erase(it);
     } else {
       ++it;
@@ -299,19 +302,18 @@
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
+  uint8_t* memory = nullptr;
   {
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
     {
       ScopedCodeCacheWrite scc(code_map_.get());
-      uint8_t* result = reinterpret_cast<uint8_t*>(
-          mspace_memalign(code_mspace_, alignment, total_size));
-      if (result == nullptr) {
+      memory = AllocateCode(total_size);
+      if (memory == nullptr) {
         return nullptr;
       }
-      code_ptr = result + header_size;
-      DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+      code_ptr = memory + header_size;
 
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
@@ -368,9 +370,7 @@
 }
 
 size_t JitCodeCache::CodeCacheSizeLocked() {
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return used_memory_for_code_;
 }
 
 size_t JitCodeCache::DataCacheSize() {
@@ -379,9 +379,7 @@
 }
 
 size_t JitCodeCache::DataCacheSizeLocked() {
-  size_t bytes_allocated = 0;
-  mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
-  return bytes_allocated;
+  return used_memory_for_data_;
 }
 
 size_t JitCodeCache::NumberOfCompiledCode() {
@@ -391,7 +389,7 @@
 
 void JitCodeCache::ClearData(Thread* self, void* data) {
   MutexLock mu(self, lock_);
-  mspace_free(data_mspace_, data);
+  FreeData(reinterpret_cast<uint8_t*>(data));
 }
 
 uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
@@ -402,7 +400,7 @@
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
-    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+    result = AllocateData(size);
   }
 
   if (result == nullptr) {
@@ -411,7 +409,7 @@
     ScopedThreadSuspension sts(self, kSuspended);
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
-    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+    result = AllocateData(size);
   }
 
   return result;
@@ -620,12 +618,11 @@
       }
     }
 
-    void* data_mspace = data_mspace_;
     // Free all profiling infos of methods that were not being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
-      [data_mspace] (ProfilingInfo* info) {
+      [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
         if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
-          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          FreeData(reinterpret_cast<uint8_t*>(info));
           return true;
         }
         return false;
@@ -710,7 +707,7 @@
     return info;
   }
 
-  uint8_t* data = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, profile_info_size));
+  uint8_t* data = AllocateData(profile_info_size);
   if (data == nullptr) {
     return nullptr;
   }
@@ -782,5 +779,51 @@
   return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
 }
 
+void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,
+                                             const OatQuickMethodHeader* header) {
+  if (method->GetEntryPointFromQuickCompiledCode() == header->GetEntryPoint()) {
+    // The entrypoint is the one to invalidate, so we just update
+    // it to the interpreter entry point and clear the counter to get the method
+    // Jitted again.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, GetQuickToInterpreterBridge());
+    method->ClearCounter();
+  } else {
+    MutexLock mu(Thread::Current(), lock_);
+    auto it = osr_code_map_.find(method);
+    if (it != osr_code_map_.end() && OatQuickMethodHeader::FromCodePointer(it->second) == header) {
+      // Remove the OSR method, to avoid using it again.
+      osr_code_map_.erase(it);
+    }
+  }
+}
+
+uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  uint8_t* result = reinterpret_cast<uint8_t*>(
+      mspace_memalign(code_mspace_, alignment, code_size));
+  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+  // Ensure the header ends up at expected instruction alignment.
+  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
+  used_memory_for_code_ += mspace_usable_size(result);
+  return result;
+}
+
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
+}
+
+uint8_t* JitCodeCache::AllocateData(size_t data_size) {
+  void* result = mspace_malloc(data_mspace_, data_size);
+  used_memory_for_data_ += mspace_usable_size(result);
+  return reinterpret_cast<uint8_t*>(result);
+}
+
+void JitCodeCache::FreeData(uint8_t* data) {
+  used_memory_for_data_ -= mspace_usable_size(data);
+  mspace_free(data_mspace_, data);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 048f8d0..ac53532 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -172,6 +172,10 @@
 
   size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
 
+  void InvalidateCompiledCodeFor(ArtMethod* method, const OatQuickMethodHeader* code)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -272,6 +276,17 @@
   // Whether we can do garbage collection.
   const bool garbage_collect_code_;
 
+  // The size in bytes of used memory for the data portion of the code cache.
+  size_t used_memory_for_data_ GUARDED_BY(lock_);
+
+  // The size in bytes of used memory for the code portion of the code cache.
+  size_t used_memory_for_code_ GUARDED_BY(lock_);
+
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
+  uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
+  void FreeData(uint8_t* data) REQUIRES(lock_);
+  uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
+
   // Number of compilations done throughout the lifetime of the JIT.
   size_t number_of_compilations_ GUARDED_BY(lock_);
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index 0aff1f7..747b112 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -125,8 +125,8 @@
  *    app.apk,131232145,11,23,454,54
  *    app.apk:classes5.dex,218490184,39,13,49,1
  **/
-bool ProfileCompilationInfo::Save(uint32_t fd) {
-  DCHECK_GE(fd, 0u);
+bool ProfileCompilationInfo::Save(int fd) {
+  DCHECK_GE(fd, 0);
   // TODO(calin): Profile this and see how much memory it takes. If too much,
   // write to file directly.
   std::ostringstream os;
@@ -232,8 +232,8 @@
   return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
 }
 
-bool ProfileCompilationInfo::Load(uint32_t fd) {
-  DCHECK_GE(fd, 0u);
+bool ProfileCompilationInfo::Load(int fd) {
+  DCHECK_GE(fd, 0);
 
   std::string current_line;
   const int kBufferSize = 1024;
@@ -343,7 +343,7 @@
   return os.str();
 }
 
-bool ProfileCompilationInfo::Equals(ProfileCompilationInfo& other) {
+bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
   return info_.Equals(other.info_);
 }
 
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index c388c4a..edc591c 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -46,11 +46,11 @@
                                 const std::vector<ArtMethod*>& methods);
 
   // Loads profile information from the given file descriptor.
-  bool Load(uint32_t fd);
+  bool Load(int fd);
   // Loads the data from another ProfileCompilationInfo object.
   bool Load(const ProfileCompilationInfo& info);
   // Saves the profile data to the given file descriptor.
-  bool Save(uint32_t fd);
+  bool Save(int fd);
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
 
@@ -65,8 +65,7 @@
                        bool print_full_dex_location = true) const;
 
   // For testing purposes.
-  bool Equals(ProfileCompilationInfo& other);
-  // Exposed for testing purpose.
+  bool Equals(const ProfileCompilationInfo& other);
   static std::string GetProfileDexFileKey(const std::string& dex_location);
 
  private:
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 422832e..3f806d3 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -532,8 +532,9 @@
   return GetFieldPtr<LengthPrefixedArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
-  Class* super_class = GetSuperClass();
+  Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
       ? MemberOffset(RoundUp(super_class->GetObjectSize(),
                              sizeof(mirror::HeapReference<mirror::Object>)))
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index cdc6204..9190e44 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1048,5 +1048,11 @@
   return depth;
 }
 
+uint32_t Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
+  std::string temp;
+  const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
+  return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 388a231..6e3463c 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1006,6 +1006,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the offset of the first reference instance field. Other reference instance fields follow.
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   MemberOffset GetFirstReferenceInstanceFieldOffset()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1119,6 +1121,9 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx);
   }
 
+  uint32_t FindTypeIndexInOtherDexFile(const DexFile& dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Class* GetJavaLangClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(HasJavaLangClass());
     return java_lang_Class_.Read();
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index eb391be4..76a36ac 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -1068,7 +1068,7 @@
       MemberOffset field_offset = kIsStatic
           ? klass->GetFirstReferenceStaticFieldOffset<kVerifyFlags, kReadBarrierOption>(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
-          : klass->GetFirstReferenceInstanceFieldOffset();
+          : klass->GetFirstReferenceInstanceFieldOffset<kVerifyFlags, kReadBarrierOption>();
       for (size_t i = 0u; i < num_reference_fields; ++i) {
         // TODO: Do a simpler check?
         if (field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index bcc2d33..910163c 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,6 +40,12 @@
 class OatHeader;
 class OatDexFile;
 
+namespace gc {
+namespace collector {
+class DummyOatFile;
+}  // namespace collector
+}  // namespace gc
+
 class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
@@ -312,6 +318,7 @@
   // elements. std::list<> and std::deque<> satisfy this requirement, std::vector<> doesn't.
   mutable std::list<std::string> string_cache_ GUARDED_BY(secondary_lookup_lock_);
 
+  friend class gc::collector::DummyOatFile;  // For modifying begin_ and end_.
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index f9d916a..d64aa43 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -394,6 +394,7 @@
 // Intended for local changes only.
 static void MaybeOverrideVerbosity() {
   //  gLogVerbosity.class_linker = true;  // TODO: don't check this in!
+  //  gLogVerbosity.collector = true;  // TODO: don't check this in!
   //  gLogVerbosity.compiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.deopt = true;  // TODO: don't check this in!
   //  gLogVerbosity.gc = true;  // TODO: don't check this in!
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 786cf06..2dfa860 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -23,6 +23,8 @@
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "handle_scope-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/throwable.h"
@@ -288,13 +290,18 @@
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
         single_frame_done_(false),
-        single_frame_deopt_method_(nullptr) {
+        single_frame_deopt_method_(nullptr),
+        single_frame_deopt_quick_method_header_(nullptr) {
   }
 
   ArtMethod* GetSingleFrameDeoptMethod() const {
     return single_frame_deopt_method_;
   }
 
+  const OatQuickMethodHeader* GetSingleFrameDeoptQuickMethodHeader() const {
+    return single_frame_deopt_quick_method_header_;
+  }
+
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
@@ -366,6 +373,7 @@
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
         single_frame_deopt_method_ = method;
+        single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
       return true;
     }
@@ -601,6 +609,7 @@
   const bool single_frame_deopt_;
   bool single_frame_done_;
   ArtMethod* single_frame_deopt_method_;
+  const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -629,13 +638,17 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
-  // Compiled code made an explicit deoptimization. Transfer the code
-  // to interpreter and clear the counter to JIT the method again.
+  // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
-  deopt_method->ClearCounter();
-  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-      deopt_method, GetQuickToInterpreterBridge());
+  if (Runtime::Current()->UseJit()) {
+    Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+        deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
+  } else {
+    // Transfer the code to interpreter.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        deopt_method, GetQuickToInterpreterBridge());
+  }
 
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2aeb792..861bd85 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1887,7 +1887,6 @@
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() != nullptr) {
-    compiler_callbacks_ = jit_->GetCompilerCallbacks();
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
                                      jit_options_->GetWarmupThreshold(),
                                      jit_options_->GetOsrThreshold());
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 1956bae..cbb3e89 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -94,8 +94,6 @@
 class Transaction;
 
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
-typedef SafeMap<MethodReference, SafeMap<uint32_t, std::set<uint32_t>>,
-    MethodReferenceComparator> MethodRefToStringInitRegMap;
 
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
 // (no logical reason to do this). You also may not start logging new roots and stop logging new
@@ -574,10 +572,6 @@
     return jit_options_.get();
   }
 
-  MethodRefToStringInitRegMap& GetStringInitMap() {
-    return method_ref_string_init_reg_map_;
-  }
-
   bool IsDebuggable() const;
 
   // Returns the build fingerprint, if set. Otherwise an empty string is returned.
@@ -803,8 +797,6 @@
   // Experimental opcodes should not be used by other production code.
   ExperimentalFlags experimental_flags_;
 
-  MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
-
   // Contains the build fingerprint, if given as a parameter.
   std::string fingerprint_;
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 07f94c0..13564a6 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1392,9 +1392,8 @@
   return filename;
 }
 
-bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
+int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
   const std::string command_line(Join(arg_vector, ' '));
-
   CHECK_GE(arg_vector.size(), 1U) << command_line;
 
   // Convert the args to char pointers.
@@ -1417,7 +1416,6 @@
     setpgid(0, 0);
 
     execv(program, &args[0]);
-
     PLOG(ERROR) << "Failed to execv(" << command_line << ")";
     // _exit to avoid atexit handlers in child.
     _exit(1);
@@ -1425,23 +1423,32 @@
     if (pid == -1) {
       *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
                                 command_line.c_str(), strerror(errno));
-      return false;
+      return -1;
     }
 
     // wait for subprocess to finish
-    int status;
+    int status = -1;
     pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
     if (got_pid != pid) {
       *error_msg = StringPrintf("Failed after fork for execv(%s) because waitpid failed: "
                                 "wanted %d, got %d: %s",
                                 command_line.c_str(), pid, got_pid, strerror(errno));
-      return false;
+      return -1;
     }
-    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-      *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
-                                command_line.c_str());
-      return false;
+    if (WIFEXITED(status)) {
+      return WEXITSTATUS(status);
     }
+    return -1;
+  }
+}
+
+bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
+  int status = ExecAndReturnCode(arg_vector, error_msg);
+  if (status != 0) {
+    const std::string command_line(Join(arg_vector, ' '));
+    *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
+                              command_line.c_str());
+    return false;
   }
   return true;
 }
diff --git a/runtime/utils.h b/runtime/utils.h
index c00db11..83ac0b87 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -111,6 +111,11 @@
       : std::abs(value);
 }
 
+template <typename T>
+inline typename std::make_unsigned<T>::type MakeUnsigned(T x) {
+  return static_cast<typename std::make_unsigned<T>::type>(x);
+}
+
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
@@ -287,6 +292,7 @@
 
 // Wrapper on fork/execv to run a command in a subprocess.
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg);
+int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg);
 
 // Returns true if the file exists.
 bool FileExists(const std::string& filename);
@@ -343,7 +349,7 @@
                             UsageFn Usage,
                             bool is_long_option = true) {
   std::string option_prefix = option_name + (is_long_option ? "=" : "");
-  DCHECK(option.starts_with(option_prefix));
+  DCHECK(option.starts_with(option_prefix)) << option << " " << option_prefix;
   const char* value_string = option.substr(option_prefix.size()).data();
   int64_t parsed_integer_value = 0;
   if (!ParseInt(value_string, &parsed_integer_value)) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index a6cf9ea..0c6060e 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -617,23 +617,6 @@
   return GetQuickInvokedMethod(inst, register_line, is_range, false);
 }
 
-SafeMap<uint32_t, std::set<uint32_t>> MethodVerifier::FindStringInitMap(ArtMethod* m) {
-  Thread* self = Thread::Current();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(),
-                          true, true, false, true);
-  // Avoid copying: The map is moved out of the verifier before the verifier is destroyed.
-  return std::move(verifier.FindStringInitMap());
-}
-
-SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() {
-  Verify();
-  return GetStringInitPcRegMap();
-}
-
 bool MethodVerifier::Verify() {
   // Some older code doesn't correctly mark constructors as such. Test for this case by looking at
   // the name.
@@ -2865,8 +2848,7 @@
          * Replace the uninitialized reference with an initialized one. We need to do this for all
          * registers that have the same object instance in them, not just the "this" register.
          */
-        const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-        work_line_->MarkRefsAsInitialized(this, this_type, this_reg, work_insn_idx_);
+        work_line_->MarkRefsAsInitialized(this, this_type);
       }
       if (return_type == nullptr) {
         return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor, false);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index b53a45c..6d8e1ab 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -213,9 +213,6 @@
   static ArtMethod* FindInvokedMethodAtDexPc(ArtMethod* m, uint32_t dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static SafeMap<uint32_t, std::set<uint32_t>> FindStringInitMap(ArtMethod* m)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   static void Init() SHARED_REQUIRES(Locks::mutator_lock_);
   static void Shutdown();
 
@@ -294,10 +291,6 @@
   ArtField* GetQuickFieldAccess(const Instruction* inst, RegisterLine* reg_line)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() {
-    return string_init_pc_reg_map_;
-  }
-
   uint32_t GetEncounteredFailureTypes() {
     return encountered_failure_types_;
   }
@@ -875,11 +868,6 @@
 
   friend class art::Thread;
 
-  // Map of dex pcs of invocations of java.lang.String.<init> to the set of other registers that
-  // contain the uninitialized this pointer to that invoke. Will contain no entry if there are
-  // no other registers.
-  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
-
   DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index bfbb78f..29d87c4 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -204,9 +204,10 @@
 }
 
 inline void RegisterLineArenaDelete::operator()(RegisterLine* ptr) const {
-  const size_t size = ptr != nullptr ? RegisterLine::ComputeSize(ptr->NumRegs()) : 0u;
-  ptr->~RegisterLine();
-  ProtectMemory(ptr, size);
+  if (ptr != nullptr) {
+    ptr->~RegisterLine();
+    ProtectMemory(ptr, RegisterLine::ComputeSize(ptr->NumRegs()));
+  }
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index b7cde99..82c371d 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -91,25 +91,14 @@
   return true;
 }
 
-void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
-                                         uint32_t this_reg, uint32_t dex_pc) {
+void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type) {
   DCHECK(uninit_type.IsUninitializedTypes());
-  bool is_string = !uninit_type.IsUnresolvedTypes() && uninit_type.GetClass()->IsStringClass();
   const RegType& init_type = verifier->GetRegTypeCache()->FromUninitialized(uninit_type);
   size_t changed = 0;
   for (uint32_t i = 0; i < num_regs_; i++) {
     if (GetRegisterType(verifier, i).Equals(uninit_type)) {
       line_[i] = init_type.GetId();
       changed++;
-      if (is_string && i != this_reg) {
-        auto it = verifier->GetStringInitPcRegMap().find(dex_pc);
-        if (it != verifier->GetStringInitPcRegMap().end()) {
-          it->second.insert(i);
-        } else {
-          std::set<uint32_t> reg_set = { i };
-          verifier->GetStringInitPcRegMap().Put(dex_pc, reg_set);
-        }
-      }
     }
   }
   // Is this initializing "this"?
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index d508454..15ae202 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -99,11 +99,14 @@
   // available now. An example is sharpening types after a check-cast. Note that when given kKeep,
   // the new_type is dchecked to be a reference type.
   template <LockOp kLockOp>
-  ALWAYS_INLINE bool SetRegisterType(MethodVerifier* verifier, uint32_t vdst,
+  ALWAYS_INLINE bool SetRegisterType(MethodVerifier* verifier,
+                                     uint32_t vdst,
                                      const RegType& new_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool SetRegisterTypeWide(MethodVerifier* verifier, uint32_t vdst, const RegType& new_type1,
+  bool SetRegisterTypeWide(MethodVerifier* verifier,
+                           uint32_t vdst,
+                           const RegType& new_type1,
                            const RegType& new_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -117,11 +120,14 @@
   // Get the type of register vsrc.
   const RegType& GetRegisterType(MethodVerifier* verifier, uint32_t vsrc) const;
 
-  ALWAYS_INLINE bool VerifyRegisterType(MethodVerifier* verifier, uint32_t vsrc,
+  ALWAYS_INLINE bool VerifyRegisterType(MethodVerifier* verifier,
+                                        uint32_t vsrc,
                                         const RegType& check_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool VerifyRegisterTypeWide(MethodVerifier* verifier, uint32_t vsrc, const RegType& check_type1,
+  bool VerifyRegisterTypeWide(MethodVerifier* verifier,
+                              uint32_t vsrc,
+                              const RegType& check_type1,
                               const RegType& check_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -155,8 +161,7 @@
    * reference type. This is called when an appropriate constructor is invoked -- all copies of
    * the reference must be marked as initialized.
    */
-  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
-                             uint32_t this_reg, uint32_t dex_pc)
+  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -210,31 +215,42 @@
    * allow_failure will return Conflict() instead of causing a verification failure if there is an
    * error.
    */
-  const RegType& GetInvocationThis(MethodVerifier* verifier, const Instruction* inst,
-                                   bool is_range, bool allow_failure = false)
+  const RegType& GetInvocationThis(MethodVerifier* verifier,
+                                   const Instruction* inst,
+                                   bool is_range,
+                                   bool allow_failure = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
    * Verify types for a simple two-register instruction (e.g. "neg-int").
    * "dst_type" is stored into vA, and "src_type" is verified against vB.
    */
-  void CheckUnaryOp(MethodVerifier* verifier, const Instruction* inst, const RegType& dst_type,
+  void CheckUnaryOp(MethodVerifier* verifier,
+                    const Instruction* inst,
+                    const RegType& dst_type,
                     const RegType& src_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpWide(MethodVerifier* verifier, const Instruction* inst,
-                        const RegType& dst_type1, const RegType& dst_type2,
-                        const RegType& src_type1, const RegType& src_type2)
+  void CheckUnaryOpWide(MethodVerifier* verifier,
+                        const Instruction* inst,
+                        const RegType& dst_type1,
+                        const RegType& dst_type2,
+                        const RegType& src_type1,
+                        const RegType& src_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpToWide(MethodVerifier* verifier, const Instruction* inst,
-                          const RegType& dst_type1, const RegType& dst_type2,
+  void CheckUnaryOpToWide(MethodVerifier* verifier,
+                          const Instruction* inst,
+                          const RegType& dst_type1,
+                          const RegType& dst_type2,
                           const RegType& src_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckUnaryOpFromWide(MethodVerifier* verifier, const Instruction* inst,
+  void CheckUnaryOpFromWide(MethodVerifier* verifier,
+                            const Instruction* inst,
                             const RegType& dst_type,
-                            const RegType& src_type1, const RegType& src_type2)
+                            const RegType& src_type1,
+                            const RegType& src_type2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -242,19 +258,28 @@
    * "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
    * against vB/vC.
    */
-  void CheckBinaryOp(MethodVerifier* verifier, const Instruction* inst,
-                     const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
+  void CheckBinaryOp(MethodVerifier* verifier,
+                     const Instruction* inst,
+                     const RegType& dst_type,
+                     const RegType& src_type1,
+                     const RegType& src_type2,
                      bool check_boolean_op)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOpWide(MethodVerifier* verifier, const Instruction* inst,
-                         const RegType& dst_type1, const RegType& dst_type2,
-                         const RegType& src_type1_1, const RegType& src_type1_2,
-                         const RegType& src_type2_1, const RegType& src_type2_2)
+  void CheckBinaryOpWide(MethodVerifier* verifier,
+                         const Instruction* inst,
+                         const RegType& dst_type1,
+                         const RegType& dst_type2,
+                         const RegType& src_type1_1,
+                         const RegType& src_type1_2,
+                         const RegType& src_type2_1,
+                         const RegType& src_type2_2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOpWideShift(MethodVerifier* verifier, const Instruction* inst,
-                              const RegType& long_lo_type, const RegType& long_hi_type,
+  void CheckBinaryOpWideShift(MethodVerifier* verifier,
+                              const Instruction* inst,
+                              const RegType& long_lo_type,
+                              const RegType& long_hi_type,
                               const RegType& int_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -262,20 +287,28 @@
    * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
    * are verified against vA/vB, then "dst_type" is stored into vA.
    */
-  void CheckBinaryOp2addr(MethodVerifier* verifier, const Instruction* inst,
+  void CheckBinaryOp2addr(MethodVerifier* verifier,
+                          const Instruction* inst,
                           const RegType& dst_type,
-                          const RegType& src_type1, const RegType& src_type2,
+                          const RegType& src_type1,
+                          const RegType& src_type2,
                           bool check_boolean_op)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOp2addrWide(MethodVerifier* verifier, const Instruction* inst,
-                              const RegType& dst_type1, const RegType& dst_type2,
-                              const RegType& src_type1_1, const RegType& src_type1_2,
-                              const RegType& src_type2_1, const RegType& src_type2_2)
+  void CheckBinaryOp2addrWide(MethodVerifier* verifier,
+                              const Instruction* inst,
+                              const RegType& dst_type1,
+                              const RegType& dst_type2,
+                              const RegType& src_type1_1,
+                              const RegType& src_type1_2,
+                              const RegType& src_type2_1,
+                              const RegType& src_type2_2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CheckBinaryOp2addrWideShift(MethodVerifier* verifier, const Instruction* inst,
-                                   const RegType& long_lo_type, const RegType& long_hi_type,
+  void CheckBinaryOp2addrWideShift(MethodVerifier* verifier,
+                                   const Instruction* inst,
+                                   const RegType& long_lo_type,
+                                   const RegType& long_hi_type,
                                    const RegType& int_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -285,9 +318,12 @@
    *
    * If "check_boolean_op" is set, we use the constant value in vC.
    */
-  void CheckLiteralOp(MethodVerifier* verifier, const Instruction* inst,
-                      const RegType& dst_type, const RegType& src_type,
-                      bool check_boolean_op, bool is_lit16)
+  void CheckLiteralOp(MethodVerifier* verifier,
+                      const Instruction* inst,
+                      const RegType& dst_type,
+                      const RegType& src_type,
+                      bool check_boolean_op,
+                      bool is_lit16)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
@@ -409,8 +445,6 @@
   void operator()(RegisterLine* ptr) const;
 };
 
-
-
 }  // namespace verifier
 }  // namespace art
 
diff --git a/test/442-checker-constant-folding/smali/TestCmp.smali b/test/442-checker-constant-folding/smali/TestCmp.smali
new file mode 100644
index 0000000..df631bc
--- /dev/null
+++ b/test/442-checker-constant-folding/smali/TestCmp.smali
@@ -0,0 +1,332 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCmp;
+
+.super Ljava/lang/Object;
+
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (before)
+## CHECK-DAG:     <<Const13:j\d+>>  LongConstant 13
+## CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const13>>,<<Const7>>]
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (after)
+## CHECK-DAG:                       LongConstant 13
+## CHECK-DAG:                       LongConstant 7
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLongConstants()I
+   .registers 5
+   const-wide v1, 13
+   const-wide v3, 7
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (before)
+## CHECK-DAG:     <<Const11:f\d+>>  FloatConstant 11
+## CHECK-DAG:     <<Const22:f\d+>>  FloatConstant 22
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const11>>,<<Const22>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 11
+## CHECK-DAG:                       FloatConstant 22
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatConstants()I
+   .registers 3
+   const v1, 11.f
+   const v2, 22.f
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (before)
+## CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const33>>,<<Const44>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 33
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatConstants()I
+   .registers 3
+   const v1, 33.f
+   const v2, 44.f
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (before)
+## CHECK-DAG:     <<Const55:d\d+>>  DoubleConstant 55
+## CHECK-DAG:     <<Const66:d\d+>>  DoubleConstant 66
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const55>>,<<Const66>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 55
+## CHECK-DAG:                       DoubleConstant 66
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleConstants()I
+   .registers 5
+   const-wide v1, 55.
+   const-wide v3, 66.
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (before)
+## CHECK-DAG:     <<Const77:d\d+>>  DoubleConstant 77
+## CHECK-DAG:     <<Const88:d\d+>>  DoubleConstant 88
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const77>>,<<Const88>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 77
+## CHECK-DAG:                       DoubleConstant 88
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstants() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleConstants()I
+   .registers 5
+   const-wide v1, 77.
+   const-wide v3, 88.
+   cmpl-double v0, v1, v3
+   return v0
+.end method
+
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const100:j\d+>> LongConstant 100
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const100>>,<<Const100>>]
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (after)
+## CHECK-DAG:                       LongConstant 100
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLongSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLongSameConstant()I
+   .registers 5
+   const-wide v1, 100
+   const-wide v3, 100
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const200:f\d+>> FloatConstant 200
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const200>>,<<Const200>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 200
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatSameConstant()I
+   .registers 3
+   const v1, 200.f
+   const v2, 200.f
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const300:f\d+>> FloatConstant 300
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const300>>,<<Const300>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 300
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatSameConstant()I
+   .registers 3
+   const v1, 300.f
+   const v2, 300.f
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const400:d\d+>> DoubleConstant 400
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const400>>,<<Const400>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 400
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleSameConstant()I
+   .registers 5
+   const-wide v1, 400.
+   const-wide v3, 400.
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (before)
+## CHECK-DAG:     <<Const500:d\d+>> DoubleConstant 500
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const500>>,<<Const500>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 500
+## CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+## CHECK-DAG:                       Return [<<Const0>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleSameConstant() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleSameConstant()I
+   .registers 5
+   const-wide v1, 500.
+   const-wide v3, 500.
+   cmpl-double v0, v1, v3
+   return v0
+.end method
+
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const44>>,<<ConstNan>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:                       FloatConstant nan
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtFloatConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtFloatConstantWithNaN()I
+   .registers 3
+   const v1, 44.f
+   const v2, NaNf
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const44:f\d+>>  FloatConstant 44
+## CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const44>>,<<ConstNan>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       FloatConstant 44
+## CHECK-DAG:                       FloatConstant nan
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtFloatConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtFloatConstantWithNaN()I
+   .registers 3
+   const v1, 44.f
+   const v2, NaNf
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const45:d\d+>>  DoubleConstant 45
+## CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const45>>,<<ConstNan>>] bias:gt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 45
+## CHECK-DAG:                       DoubleConstant nan
+## CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+## CHECK-DAG:                       Return [<<Const1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpGtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpGtDoubleConstantWithNaN()I
+   .registers 5
+   const-wide v1, 45.
+   const-wide v3, NaN
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (before)
+## CHECK-DAG:     <<Const46:d\d+>>  DoubleConstant 46
+## CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+## CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Const46>>,<<ConstNan>>] bias:lt
+## CHECK-DAG:                       Return [<<Cmp>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-DAG:                       DoubleConstant 46
+## CHECK-DAG:                       DoubleConstant nan
+## CHECK-DAG:     <<ConstM1:i\d+>>  IntConstant -1
+## CHECK-DAG:                       Return [<<ConstM1>>]
+
+## CHECK-START: int TestCmp.$opt$CmpLtDoubleConstantWithNaN() constant_folding (after)
+## CHECK-NOT:                       Compare
+
+.method public static $opt$CmpLtDoubleConstantWithNaN()I
+   .registers 5
+   const-wide v1, 46.
+   const-wide v3, NaN
+   cmpl-double v0, v1, v3
+   return v0
+.end method
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 5479818..93fe397 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -14,8 +14,13 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 public class Main {
 
+  // Workaround for b/18051191.
+  class InnerClass {}
+
   public static void assertFalse(boolean condition) {
     if (condition) {
       throw new Error();
@@ -47,6 +52,68 @@
   }
 
 
+  // Wrappers around methods located in file TestCmp.smali.
+
+  public int smaliCmpLongConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLongConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtFloatConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleConstants");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleConstants() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleConstants");
+    return (Integer)m.invoke(null);
+  }
+
+  public int smaliCmpLongSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLongSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtFloatSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleSameConstant");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleSameConstant() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleSameConstant");
+    return (Integer)m.invoke(null);
+  }
+
+  public int smaliCmpGtFloatConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtFloatConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtFloatConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtFloatConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpGtDoubleConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpGtDoubleConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+  public int smaliCmpLtDoubleConstantWithNaN() throws Exception {
+    Method m = testCmp.getMethod("$opt$CmpLtDoubleConstantWithNaN");
+    return (Integer)m.invoke(null);
+  }
+
+
   /**
    * Exercise constant folding on negation.
    */
@@ -89,6 +156,44 @@
     return y;
   }
 
+  /// CHECK-START: float Main.FloatNegation() constant_folding (before)
+  /// CHECK-DAG:     <<Const42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Neg:f\d+>>      Neg [<<Const42>>]
+  /// CHECK-DAG:                       Return [<<Neg>>]
+
+  /// CHECK-START: float Main.FloatNegation() constant_folding (after)
+  /// CHECK-DAG:     <<ConstN42:f\d+>> FloatConstant -42
+  /// CHECK-DAG:                       Return [<<ConstN42>>]
+
+  /// CHECK-START: float Main.FloatNegation() constant_folding (after)
+  /// CHECK-NOT:                       Neg
+
+  public static float FloatNegation() {
+    float x, y;
+    x = 42F;
+    y = -x;
+    return y;
+  }
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (before)
+  /// CHECK-DAG:     <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:     <<Neg:d\d+>>      Neg [<<Const42>>]
+  /// CHECK-DAG:                       Return [<<Neg>>]
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (after)
+  /// CHECK-DAG:     <<ConstN42:d\d+>> DoubleConstant -42
+  /// CHECK-DAG:                       Return [<<ConstN42>>]
+
+  /// CHECK-START: double Main.DoubleNegation() constant_folding (after)
+  /// CHECK-NOT:                       Neg
+
+  public static double DoubleNegation() {
+    double x, y;
+    x = 42D;
+    y = -x;
+    return y;
+  }
+
 
   /**
    * Exercise constant folding on addition.
@@ -166,6 +271,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatAddition() constant_folding (before)
+  /// CHECK-DAG:     <<Const1:f\d+>>  FloatConstant 1
+  /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
+  /// CHECK-DAG:     <<Add:f\d+>>     Add [<<Const1>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding (after)
+  /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
+  /// CHECK-DAG:                      Return [<<Const3>>]
+
+  /// CHECK-START: float Main.FloatAddition() constant_folding (after)
+  /// CHECK-NOT:                      Add
+
+  public static float FloatAddition() {
+    float a, b, c;
+    a = 1F;
+    b = 2F;
+    c = a + b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding (before)
+  /// CHECK-DAG:     <<Const1:d\d+>>  DoubleConstant 1
+  /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
+  /// CHECK-DAG:     <<Add:d\d+>>     Add [<<Const1>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Add>>]
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding (after)
+  /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
+  /// CHECK-DAG:                      Return [<<Const3>>]
+
+  /// CHECK-START: double Main.DoubleAddition() constant_folding (after)
+  /// CHECK-NOT:                      Add
+
+  public static double DoubleAddition() {
+    double a, b, c;
+    a = 1D;
+    b = 2D;
+    c = a + b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on subtraction.
@@ -213,6 +360,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding (before)
+  /// CHECK-DAG:     <<Const6:f\d+>>  FloatConstant 6
+  /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
+  /// CHECK-DAG:     <<Sub:f\d+>>     Sub [<<Const6>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Sub>>]
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding (after)
+  /// CHECK-DAG:     <<Const4:f\d+>>  FloatConstant 4
+  /// CHECK-DAG:                      Return [<<Const4>>]
+
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding (after)
+  /// CHECK-NOT:                      Sub
+
+  public static float FloatSubtraction() {
+    float a, b, c;
+    a = 6F;
+    b = 2F;
+    c = a - b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding (before)
+  /// CHECK-DAG:     <<Const6:d\d+>>  DoubleConstant 6
+  /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
+  /// CHECK-DAG:     <<Sub:d\d+>>     Sub [<<Const6>>,<<Const2>>]
+  /// CHECK-DAG:                      Return [<<Sub>>]
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding (after)
+  /// CHECK-DAG:     <<Const4:d\d+>>  DoubleConstant 4
+  /// CHECK-DAG:                      Return [<<Const4>>]
+
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding (after)
+  /// CHECK-NOT:                      Sub
+
+  public static double DoubleSubtraction() {
+    double a, b, c;
+    a = 6D;
+    b = 2D;
+    c = a - b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on multiplication.
@@ -260,6 +449,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding (before)
+  /// CHECK-DAG:     <<Const7:f\d+>>  FloatConstant 7
+  /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
+  /// CHECK-DAG:     <<Mul:f\d+>>     Mul [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:                      Return [<<Mul>>]
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding (after)
+  /// CHECK-DAG:     <<Const21:f\d+>> FloatConstant 21
+  /// CHECK-DAG:                      Return [<<Const21>>]
+
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding (after)
+  /// CHECK-NOT:                      Mul
+
+  public static float FloatMultiplication() {
+    float a, b, c;
+    a = 7F;
+    b = 3F;
+    c = a * b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding (before)
+  /// CHECK-DAG:     <<Const7:d\d+>>  DoubleConstant 7
+  /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
+  /// CHECK-DAG:     <<Mul:d\d+>>     Mul [<<Const7>>,<<Const3>>]
+  /// CHECK-DAG:                      Return [<<Mul>>]
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding (after)
+  /// CHECK-DAG:     <<Const21:d\d+>> DoubleConstant 21
+  /// CHECK-DAG:                      Return [<<Const21>>]
+
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding (after)
+  /// CHECK-NOT:                      Mul
+
+  public static double DoubleMultiplication() {
+    double a, b, c;
+    a = 7D;
+    b = 3D;
+    c = a * b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on division.
@@ -311,6 +542,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatDivision() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
+  /// CHECK-DAG:     <<Div:f\d+>>      Div [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Div>>]
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding (after)
+  /// CHECK-DAG:     <<Const3P2:f\d+>> FloatConstant 3.2
+  /// CHECK-DAG:                       Return [<<Const3P2>>]
+
+  /// CHECK-START: float Main.FloatDivision() constant_folding (after)
+  /// CHECK-NOT:                       Div
+
+  public static float FloatDivision() {
+    float a, b, c;
+    a = 8F;
+    b = 2.5F;
+    c = a / b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
+  /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
+  /// CHECK-DAG:     <<Div:d\d+>>      Div [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Div>>]
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding (after)
+  /// CHECK-DAG:     <<Const3P2:d\d+>> DoubleConstant 3.2
+  /// CHECK-DAG:                       Return [<<Const3P2>>]
+
+  /// CHECK-START: double Main.DoubleDivision() constant_folding (after)
+  /// CHECK-NOT:                       Div
+
+  public static double DoubleDivision() {
+    double a, b, c;
+    a = 8D;
+    b = 2.5D;
+    c = a / b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on remainder.
@@ -362,6 +635,48 @@
     return c;
   }
 
+  /// CHECK-START: float Main.FloatRemainder() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
+  /// CHECK-DAG:     <<Rem:f\d+>>      Rem [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Rem>>]
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding (after)
+  /// CHECK-DAG:     <<Const0P5:f\d+>> FloatConstant 0.5
+  /// CHECK-DAG:                       Return [<<Const0P5>>]
+
+  /// CHECK-START: float Main.FloatRemainder() constant_folding (after)
+  /// CHECK-NOT:                       Rem
+
+  public static float FloatRemainder() {
+    float a, b, c;
+    a = 8F;
+    b = 2.5F;
+    c = a % b;
+    return c;
+  }
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding (before)
+  /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
+  /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
+  /// CHECK-DAG:     <<Rem:d\d+>>      Rem [<<Const8>>,<<Const2P5>>]
+  /// CHECK-DAG:                       Return [<<Rem>>]
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding (after)
+  /// CHECK-DAG:     <<Const0P5:d\d+>> DoubleConstant 0.5
+  /// CHECK-DAG:                       Return [<<Const0P5>>]
+
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding (after)
+  /// CHECK-NOT:                       Rem
+
+  public static double DoubleRemainder() {
+    double a, b, c;
+    a = 8D;
+    b = 2.5D;
+    c = a % b;
+    return c;
+  }
+
 
   /**
    * Exercise constant folding on left shift.
@@ -1197,25 +1512,37 @@
   }
 
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws Exception {
     assertIntEquals(-42, IntNegation());
     assertLongEquals(-42L, LongNegation());
+    assertFloatEquals(-42F, FloatNegation());
+    assertDoubleEquals(-42D, DoubleNegation());
 
     assertIntEquals(3, IntAddition1());
     assertIntEquals(14, IntAddition2());
     assertLongEquals(3L, LongAddition());
+    assertFloatEquals(3F, FloatAddition());
+    assertDoubleEquals(3D, DoubleAddition());
 
     assertIntEquals(4, IntSubtraction());
     assertLongEquals(4L, LongSubtraction());
+    assertFloatEquals(4F, FloatSubtraction());
+    assertDoubleEquals(4D, DoubleSubtraction());
 
     assertIntEquals(21, IntMultiplication());
     assertLongEquals(21L, LongMultiplication());
+    assertFloatEquals(21F, FloatMultiplication());
+    assertDoubleEquals(21D, DoubleMultiplication());
 
     assertIntEquals(2, IntDivision());
     assertLongEquals(2L, LongDivision());
+    assertFloatEquals(3.2F, FloatDivision());
+    assertDoubleEquals(3.2D, DoubleDivision());
 
     assertIntEquals(2, IntRemainder());
     assertLongEquals(2L, LongRemainder());
+    assertFloatEquals(0.5F, FloatRemainder());
+    assertDoubleEquals(0.5D, DoubleRemainder());
 
     assertIntEquals(4, ShlIntLong());
     assertLongEquals(12L, ShlLongInt());
@@ -1259,6 +1586,24 @@
     assertFalse(CmpFloatGreaterThanNaN(arbitrary));
     assertFalse(CmpDoubleLessThanNaN(arbitrary));
 
+    Main main = new Main();
+    assertIntEquals(1, main.smaliCmpLongConstants());
+    assertIntEquals(-1, main.smaliCmpGtFloatConstants());
+    assertIntEquals(-1, main.smaliCmpLtFloatConstants());
+    assertIntEquals(-1, main.smaliCmpGtDoubleConstants());
+    assertIntEquals(-1, main.smaliCmpLtDoubleConstants());
+
+    assertIntEquals(0, main.smaliCmpLongSameConstant());
+    assertIntEquals(0, main.smaliCmpGtFloatSameConstant());
+    assertIntEquals(0, main.smaliCmpLtFloatSameConstant());
+    assertIntEquals(0, main.smaliCmpGtDoubleSameConstant());
+    assertIntEquals(0, main.smaliCmpLtDoubleSameConstant());
+
+    assertIntEquals(1, main.smaliCmpGtFloatConstantWithNaN());
+    assertIntEquals(-1, main.smaliCmpLtFloatConstantWithNaN());
+    assertIntEquals(1, main.smaliCmpGtDoubleConstantWithNaN());
+    assertIntEquals(-1, main.smaliCmpLtDoubleConstantWithNaN());
+
     assertIntEquals(33, ReturnInt33());
     assertIntEquals(2147483647, ReturnIntMax());
     assertIntEquals(0, ReturnInt0());
@@ -1275,4 +1620,10 @@
     assertDoubleEquals(34, ReturnDouble34());
     assertDoubleEquals(99.25, ReturnDouble99P25());
   }
+
+  Main() throws ClassNotFoundException {
+    testCmp = Class.forName("TestCmp");
+  }
+
+  private Class<?> testCmp;
 }
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
index 3a1b3fc..e215ab0 100644
--- a/test/566-checker-codegen-select/src/Main.java
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -20,13 +20,6 @@
   /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK-NEXT:                  Select [{{j\d+}},{{j\d+}},<<Cond>>]
 
-  // Condition must be materialized on X86 because it would need too many
-  // registers otherwise.
-  /// CHECK-START-X86: long Main.$noinline$longSelect(long) disassembly (after)
-  /// CHECK:             LessThanOrEqual
-  /// CHECK-NEXT:          cmp
-  /// CHECK:             Select
-
   public long $noinline$longSelect(long param) {
     if (doThrow) { throw new Error(); }
     long val_true = longB;
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 4c58b39..09e97ea 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -41,7 +41,8 @@
         (m_name.compare("$noinline$returnDouble") == 0) ||
         (m_name.compare("$noinline$returnLong") == 0) ||
         (m_name.compare("$noinline$deopt") == 0) ||
-        (m_name.compare("$noinline$inlineCache") == 0)) {
+        (m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       const OatQuickMethodHeader* header =
           Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
       if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
@@ -91,7 +92,8 @@
     ArtMethod* m = GetMethod();
     std::string m_name(m->GetName());
 
-    if (m_name.compare("$noinline$inlineCache") == 0) {
+    if ((m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
       return false;
     }
@@ -119,7 +121,8 @@
     std::string m_name(m->GetName());
 
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (m_name.compare("$noinline$inlineCache") == 0 && jit != nullptr) {
+    if ((m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
         // Sleep to yield to the compiler thread.
         sleep(0);
diff --git a/test/570-checker-osr/run b/test/570-checker-osr/run
new file mode 100755
index 0000000..24d69b4
--- /dev/null
+++ b/test/570-checker-osr/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure this test is not subject to code collection.
+exec ${RUN} "$@" --runtime-option -Xjitinitialsize:32M
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 828908a..1142d49 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -40,6 +40,9 @@
     if ($noinline$inlineCache(new SubMain(), /* isSecondInvocation */ true) != SubMain.class) {
       throw new Error("Unexpected return value");
     }
+
+    $noinline$stackOverflow(new Main(), /* isSecondInvocation */ false);
+    $noinline$stackOverflow(new SubMain(), /* isSecondInvocation */ true);
   }
 
   public static int $noinline$returnInt() {
@@ -129,7 +132,32 @@
     return Main.class;
   }
 
-  public static int[] array = new int[4];
+  public void otherInlineCache() {
+    return;
+  }
+
+  public static void $noinline$stackOverflow(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!ensureInInterpreter()) {
+      return;
+    }
+
+    // We need a ProfilingInfo object to populate the 'otherInlineCache' call.
+    ensureHasProfilingInfo();
+
+    if (isSecondInvocation) {
+      // Ensure we have an OSR code and we jump to it.
+      while (!ensureInOsrCode()) {}
+    }
+
+    for (int i = 0; i < (isSecondInvocation ? 10000000 : 1); ++i) {
+      // The first invocation of $noinline$stackOverflow will populate the inline
+      // cache with Main. The second invocation of the method, will see a SubMain
+      // and will therefore trigger deoptimization.
+      m.otherInlineCache();
+    }
+  }
 
   public static native boolean ensureInInterpreter();
   public static native boolean ensureInOsrCode();
@@ -147,4 +175,8 @@
   public Main inlineCache() {
     return new SubMain();
   }
+
+  public void otherInlineCache() {
+    return;
+  }
 }
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index 8a4cf60..59741d6 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -29,6 +29,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
     return cond ? x : y;
   }
@@ -46,6 +51,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarCst(boolean cond, int x) {
     return cond ? x : 1;
   }
@@ -63,6 +73,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntCstVar(boolean cond, int y) {
     return cond ? 1 : y;
   }
@@ -80,6 +95,12 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
     return cond ? x : y;
   }
@@ -97,6 +118,12 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongVarCst(boolean cond, long x) {
     return cond ? x : 1L;
   }
@@ -114,6 +141,12 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongCstVar(boolean cond, long y) {
     return cond ? 1L : y;
   }
@@ -168,6 +201,11 @@
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ng
 
+  /// CHECK-START-X86: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
     return a > b ? x : y;
   }
@@ -189,6 +227,11 @@
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ng
 
+  /// CHECK-START-X86: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
@@ -208,6 +251,12 @@
   /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ngq
 
+  /// CHECK-START-X86: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+
   public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
     return a > b ? x : y;
   }
@@ -232,6 +281,15 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
     long result = (a > b ? x : y);
     return result + (a > b ? 0L : 1L);
diff --git a/test/575-checker-isnan/expected.txt b/test/575-checker-isnan/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/575-checker-isnan/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/575-checker-isnan/info.txt b/test/575-checker-isnan/info.txt
new file mode 100644
index 0000000..5c48a6a
--- /dev/null
+++ b/test/575-checker-isnan/info.txt
@@ -0,0 +1 @@
+Unit test for float/double isNaN() operation.
diff --git a/test/575-checker-isnan/src/Main.java b/test/575-checker-isnan/src/Main.java
new file mode 100644
index 0000000..cc71e5e
--- /dev/null
+++ b/test/575-checker-isnan/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:z\d+>> InvokeStaticOrDirect
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (after)
+  /// CHECK-DAG: <<Result:z\d+>> NotEqual
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN32(float) instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect
+  private static boolean isNaN32(float x) {
+    return Float.isNaN(x);
+  }
+
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (before)
+  /// CHECK-DAG: <<Result:z\d+>> InvokeStaticOrDirect
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (after)
+  /// CHECK-DAG: <<Result:z\d+>> NotEqual
+  /// CHECK-DAG: Return [<<Result>>]
+  //
+  /// CHECK-START: boolean Main.isNaN64(double) instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect
+  private static boolean isNaN64(double x) {
+    return Double.isNaN(x);
+  }
+
+  public static void main(String args[]) {
+    // A few distinct numbers.
+    expectFalse(isNaN32(Float.NEGATIVE_INFINITY));
+    expectFalse(isNaN32(-1.0f));
+    expectFalse(isNaN32(-0.0f));
+    expectFalse(isNaN32(0.0f));
+    expectFalse(isNaN32(1.0f));
+    expectFalse(isNaN32(Float.POSITIVE_INFINITY));
+
+    // A few distinct subnormal numbers.
+    expectFalse(isNaN32(Float.intBitsToFloat(0x00400000)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x80400000)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x00000001)));
+    expectFalse(isNaN32(Float.intBitsToFloat(0x80000001)));
+
+    // A few NaN numbers.
+    expectTrue(isNaN32(Float.NaN));
+    expectTrue(isNaN32(0.0f / 0.0f));
+    expectTrue(isNaN32((float)Math.sqrt(-1.0f)));
+    float[] fvals = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectTrue(isNaN32(fvals[i]));
+    }
+
+    // A few distinct numbers.
+    expectFalse(isNaN64(Double.NEGATIVE_INFINITY));
+    expectFalse(isNaN32(-1.0f));
+    expectFalse(isNaN64(-0.0d));
+    expectFalse(isNaN64(0.0d));
+    expectFalse(isNaN64(1.0d));
+    expectFalse(isNaN64(Double.POSITIVE_INFINITY));
+
+    // A few distinct subnormal numbers.
+    expectFalse(isNaN64(Double.longBitsToDouble(0x0008000000000000l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x8008000000000000l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x0000000000000001l)));
+    expectFalse(isNaN64(Double.longBitsToDouble(0x8000000000000001l)));
+
+    // A few NaN numbers.
+    expectTrue(isNaN64(Double.NaN));
+    expectTrue(isNaN64(0.0d / 0.0d));
+    expectTrue(isNaN64(Math.sqrt(-1.0d)));
+    double[] dvals = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectTrue(isNaN64(dvals[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectTrue(boolean value) {
+    if (!value) {
+      throw new Error("Expected True");
+    }
+  }
+
+  private static void expectFalse(boolean value) {
+    if (value) {
+      throw new Error("Expected False");
+    }
+  }
+}
diff --git a/test/575-checker-string-init-alias/expected.txt b/test/575-checker-string-init-alias/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/575-checker-string-init-alias/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/575-checker-string-init-alias/info.txt b/test/575-checker-string-init-alias/info.txt
new file mode 100644
index 0000000..a91ea64
--- /dev/null
+++ b/test/575-checker-string-init-alias/info.txt
@@ -0,0 +1,2 @@
+Test for the String.<init> change and deoptimization: make
+sure the compiler knows how to handle dex aliases.
diff --git a/test/575-checker-string-init-alias/smali/TestCase.smali b/test/575-checker-string-init-alias/smali/TestCase.smali
new file mode 100644
index 0000000..ff04b27
--- /dev/null
+++ b/test/575-checker-string-init-alias/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.field public static staticField:Ljava/lang/String;
+
+## CHECK-START: void TestCase.testNoAlias(int[], java.lang.String) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+.method public static testNoAlias([ILjava/lang/String;)V
+    .registers 6
+    const v1, 0
+    const v2, 1
+    new-instance v0, Ljava/lang/String;
+
+    # Will deoptimize.
+    aget v3, p0, v1
+
+    # Check that we're being executed by the interpreter.
+    invoke-static {}, LMain;->assertIsInterpreted()V
+
+    invoke-direct {v0, p1}, Ljava/lang/String;-><init>(Ljava/lang/String;)V
+
+    sput-object v0, LTestCase;->staticField:Ljava/lang/String;
+
+    # Will throw AIOOBE.
+    aget v3, p0, v2
+
+    return-void
+.end method
+
+## CHECK-START: void TestCase.testAlias(int[], java.lang.String) register (after)
+## CHECK:         <<New:l\d+>>    NewInstance
+## CHECK:                         Deoptimize env:[[<<New>>,<<New>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+.method public static testAlias([ILjava/lang/String;)V
+    .registers 7
+    const v2, 0
+    const v3, 1
+    new-instance v0, Ljava/lang/String;
+    move-object v1, v0
+
+    # Will deoptimize.
+    aget v4, p0, v2
+
+    # Check that we're being executed by the interpreter.
+    invoke-static {}, LMain;->assertIsInterpreted()V
+
+    invoke-direct {v1, p1}, Ljava/lang/String;-><init>(Ljava/lang/String;)V
+
+    sput-object v1, LTestCase;->staticField:Ljava/lang/String;
+
+    # Will throw AIOOBE.
+    aget v4, p0, v3
+
+    return-void
+.end method
diff --git a/test/575-checker-string-init-alias/src/Main.java b/test/575-checker-string-init-alias/src/Main.java
new file mode 100644
index 0000000..1ab3207
--- /dev/null
+++ b/test/575-checker-string-init-alias/src/Main.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static native void assertIsInterpreted();
+
+  private static void assertEqual(String expected, String actual) {
+    if (!expected.equals(actual)) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    int[] array = new int[1];
+
+    {
+      Method m = c.getMethod("testNoAlias", int[].class, String.class);
+      try {
+        m.invoke(null, new Object[] { array , "foo" });
+        throw new Error("Expected AIOOBE");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+          throw new Error("Expected AIOOBE");
+        }
+        // Ignore
+      }
+      Field field = c.getField("staticField");
+      assertEqual("foo", (String)field.get(null));
+    }
+
+    {
+      Method m = c.getMethod("testAlias", int[].class, String.class);
+      try {
+        m.invoke(null, new Object[] { array, "bar" });
+        throw new Error("Expected AIOOBE");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+          throw new Error("Expected AIOOBE");
+        }
+        // Ignore
+      }
+      Field field = c.getField("staticField");
+      assertEqual("bar", (String)field.get(null));
+    }
+  }
+}
diff --git a/test/576-polymorphic-inlining/expected.txt b/test/576-polymorphic-inlining/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/576-polymorphic-inlining/expected.txt
diff --git a/test/576-polymorphic-inlining/info.txt b/test/576-polymorphic-inlining/info.txt
new file mode 100644
index 0000000..b3ef0c8
--- /dev/null
+++ b/test/576-polymorphic-inlining/info.txt
@@ -0,0 +1 @@
+Test for polymorphic inlining.
diff --git a/test/576-polymorphic-inlining/src/Main.java b/test/576-polymorphic-inlining/src/Main.java
new file mode 100644
index 0000000..d8d09af
--- /dev/null
+++ b/test/576-polymorphic-inlining/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    for (int i = 0; i < 20000; ++i) {
+      $noinline$testVoid(new Main());
+      $noinline$testVoid(new SubMain());
+      $noinline$testVoid(new SubSubMain());
+
+      $noinline$testWithReturnValue(new Main());
+      $noinline$testWithReturnValue(new SubMain());
+      $noinline$testWithReturnValue(new SubSubMain());
+
+      $noinline$testWithBackEdge(new Main());
+      $noinline$testWithBackEdge(new SubMain());
+      $noinline$testWithBackEdge(new SubSubMain());
+    }
+  }
+
+  public static void assertIdentical(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void $noinline$testVoid(Main m) {
+    if (doThrow) throw new Error("");
+    m.willInlineVoid();
+    m.willOnlyInlineForMainVoid();
+  }
+
+  public static void $noinline$testWithReturnValue(Main m) {
+    if (doThrow) throw new Error("");
+    assertIdentical(m.getClass(), m.willInlineWithReturnValue());
+    assertIdentical(m.getClass(), m.willOnlyInlineForMainWithReturnValue());
+  }
+
+  public static void $noinline$testWithBackEdge(Main m) {
+    if (doThrow) throw new Error("");
+    for (int i = 0; i < 10; ++i) {
+      m.willInlineVoid();
+    }
+    for (int i = 0; i < 10; ++i) {
+      m.willOnlyInlineForMainVoid();
+    }
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public void willOnlyInlineForMainVoid() {
+  }
+
+  public Class willInlineWithReturnValue() {
+    return Main.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return Main.class;
+  }
+  public static boolean doThrow;
+}
+
+class SubMain extends Main {
+  public void willOnlyInlineForMainVoid() {
+    if (doThrow) throw new Error("");
+  }
+
+  public void willInlineVoid() {
+  }
+
+  public Class willInlineWithReturnValue() {
+    return SubMain.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return SubMain.class;
+  }
+}
+
+class SubSubMain extends SubMain {
+  public Class willInlineWithReturnValue() {
+    return SubSubMain.class;
+  }
+
+  public Class willOnlyInlineForMainWithReturnValue() {
+    return SubSubMain.class;
+  }
+}
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 44206df..e6394a9 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -272,5 +272,10 @@
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
           "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
+},
+{
+  description: "Only work with --mode=activity",
+  result: EXEC_FAILED,
+  names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index d8ef9ba..f347429 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -29,6 +29,7 @@
   modes: [host],
   names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
+          "libcore.java.util.zip.GZIPInputStreamTest#testLongMessage",
           "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
           "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index f29e51f..e4af9fa 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -20,9 +20,9 @@
 fi
 
 # Jar containing all the tests.
-test_jar=${OUT_DIR-out}/host/linux-x86/framework/apache-harmony-jdwp-tests-hostdex.jar
+test_jack=${OUT_DIR-out}/host/common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
 
-if [ ! -f $test_jar ]; then
+if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
        "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
   exit 1
@@ -117,6 +117,9 @@
   art_debugee="$art_debugee -verbose:jdwp"
 fi
 
+# Use Jack with "1.8" configuration.
+export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
+
 # Run the tests using vogar.
 vogar $vm_command \
       $vm_args \
@@ -129,7 +132,8 @@
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
       --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
-      --classpath $test_jar \
+      --classpath $test_jack \
+      --toolchain jack --language JN \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test