Merge "Introduce more compact ReadBarrierMark slow-paths."
diff --git a/benchmark/Android.mk b/benchmark/Android.mk
index a4a603a..17ea4da 100644
--- a/benchmark/Android.mk
+++ b/benchmark/Android.mk
@@ -56,7 +56,7 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 123bcaa..bd13d16 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -34,7 +34,7 @@
 ART_BUILD_HOST_NDEBUG ?= true
 ART_BUILD_HOST_DEBUG ?= true
 
-# Set this to change what opt level Art is built at.
+# Set this to change what opt level ART is built at.
 ART_DEBUG_OPT_FLAG ?= -O2
 ART_NDEBUG_OPT_FLAG ?= -O3
 
@@ -336,6 +336,12 @@
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
   -UNDEBUG
 
+# Assembler flags for non-debug ART and ART tools.
+art_non_debug_asflags :=
+
+# Assembler flags for debug ART and ART tools.
+art_debug_asflags := -UNDEBUG
+
 art_host_non_debug_cflags := $(art_non_debug_cflags)
 art_target_non_debug_cflags := $(art_non_debug_cflags)
 
@@ -386,6 +392,11 @@
 ART_HOST_DEBUG_CFLAGS := $(art_debug_cflags)
 ART_TARGET_DEBUG_CFLAGS := $(art_debug_cflags)
 
+ART_HOST_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_TARGET_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_HOST_DEBUG_ASFLAGS := $(art_debug_asflags)
+ART_TARGET_DEBUG_ASFLAGS := $(art_debug_asflags)
+
 ifndef LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA
   LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA=-0x1000000
 endif
@@ -414,6 +425,8 @@
 art_target_cflags :=
 art_debug_cflags :=
 art_non_debug_cflags :=
+art_debug_asflags :=
+art_non_debug_asflags :=
 art_host_non_debug_cflags :=
 art_target_non_debug_cflags :=
 art_default_gc_type_cflags :=
@@ -435,8 +448,10 @@
   art_target_cflags_ndebug_or_debug := $(1)
   ifeq ($$(art_target_cflags_ndebug_or_debug),debug)
     LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_DEBUG_ASFLAGS)
   else
     LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_NON_DEBUG_ASFLAGS)
   endif
 
   LOCAL_CLANG_CFLAGS := $(ART_TARGET_CLANG_CFLAGS)
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index cb6d340..157500b 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -90,8 +90,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     ifeq ($$(art_static_or_shared),static)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 009933d..74c3033 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -94,17 +94,17 @@
 
 ART_GTEST_dex2oat_environment_tests_HOST_DEPS := \
   $(HOST_CORE_IMAGE_default_no-pic_64) \
-  $(HOST_CORE_IMAGE_default_no-pic_32)
+  $(HOST_CORE_IMAGE_default_no-pic_32) \
+  $(HOST_OUT_EXECUTABLES)/patchoatd
 ART_GTEST_dex2oat_environment_tests_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_default_no-pic_64) \
-  $(TARGET_CORE_IMAGE_default_no-pic_32)
+  $(TARGET_CORE_IMAGE_default_no-pic_32) \
+  $(TARGET_OUT_EXECUTABLES)/patchoatd
 
 ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
-   $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS) \
-   $(HOST_OUT_EXECUTABLES)/patchoatd
+  $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS)
 ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
-   $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS) \
-   $(TARGET_OUT_EXECUTABLES)/patchoatd
+  $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS)
 
 
 ART_GTEST_dex2oat_test_HOST_DEPS := \
@@ -679,7 +679,7 @@
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) $$(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4ec7d72..2666835 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -75,6 +75,7 @@
 	optimizing/ssa_liveness_analysis.cc \
 	optimizing/ssa_phi_elimination.cc \
 	optimizing/stack_map_stream.cc \
+	optimizing/x86_memory_gen.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
@@ -262,8 +263,10 @@
     endif
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 474530a..4c0095d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2521,11 +2521,28 @@
                                                               true);
     }
     // Create the conflict tables.
-    if (!klass->IsTemp() && klass->ShouldHaveEmbeddedImtAndVTable()) {
-      Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
-    }
+    FillIMTAndConflictTables(klass);
     return true;
   }
+
+ private:
+  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!klass->ShouldHaveImt()) {
+      return;
+    }
+    if (visited_classes_.find(klass) != visited_classes_.end()) {
+      return;
+    }
+    if (klass->HasSuperClass()) {
+      FillIMTAndConflictTables(klass->GetSuperClass());
+    }
+    if (!klass->IsTemp()) {
+      Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
+    }
+    visited_classes_.insert(klass);
+  }
+
+  std::set<mirror::Class*> visited_classes_;
 };
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index da10568..063eb11 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1232,9 +1232,10 @@
       }
       // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
       // live.
-      if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
-        for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-          ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
+      if (as_klass->ShouldHaveImt()) {
+        ImTable* imt = as_klass->GetImt(target_ptr_size_);
+        for (size_t i = 0; i < ImTable::kSize; ++i) {
+          ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
           DCHECK(imt_method != nullptr);
           if (imt_method->IsRuntimeMethod() &&
               !IsInBootImage(imt_method) &&
@@ -1243,6 +1244,11 @@
           }
         }
       }
+
+      if (as_klass->ShouldHaveImt()) {
+        ImTable* imt = as_klass->GetImt(target_ptr_size_);
+        TryAssignImTableOffset(imt, oat_index);
+      }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
       int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength();
@@ -1269,6 +1275,23 @@
   return native_object_relocations_.find(ptr) != native_object_relocations_.end();
 }
 
+void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
+  // No offset, or already assigned.
+  if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
+    return;
+  }
+  // If the method is a conflict method we also want to assign the conflict table offset.
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  const size_t size = ImTable::SizeInBytes(target_ptr_size_);
+  native_object_relocations_.emplace(
+      imt,
+      NativeObjectRelocation {
+          oat_index,
+          image_info.bin_slot_sizes_[kBinImTable],
+          kNativeObjectRelocationTypeIMTable});
+  image_info.bin_slot_sizes_[kBinImTable] += size;
+}
+
 void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
   // No offset, or already assigned.
   if (table == nullptr || NativeRelocationAssigned(table)) {
@@ -1391,6 +1414,7 @@
           bin_offset = RoundUp(bin_offset, method_alignment);
           break;
         }
+        case kBinImTable:
         case kBinIMTConflictTable: {
           bin_offset = RoundUp(bin_offset, target_ptr_size_);
           break;
@@ -1461,6 +1485,10 @@
       bin_slot_offsets_[kBinArtMethodClean],
       bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
 
+  // IMT section.
+  ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables];
+  *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]);
+
   // Conflict tables section.
   ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
   *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
@@ -1585,6 +1613,13 @@
   ImageWriter* const image_writer_;
 };
 
+void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
+  for (size_t i = 0; i < ImTable::kSize; ++i) {
+    ArtMethod* method = orig->Get(i, target_ptr_size_);
+    copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
+  }
+}
+
 void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) {
   const size_t count = orig->NumEntries(target_ptr_size_);
   for (size_t i = 0; i < count; ++i) {
@@ -1642,6 +1677,12 @@
       case kNativeObjectRelocationTypeDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
+      case kNativeObjectRelocationTypeIMTable: {
+        ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first);
+        ImTable* dest_imt = reinterpret_cast<ImTable*>(dest);
+        CopyAndFixupImTable(orig_imt, dest_imt);
+        break;
+      }
       case kNativeObjectRelocationTypeIMTConflictTable: {
         auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
         CopyAndFixupImtConflictTable(
@@ -1850,13 +1891,25 @@
 }
 
 template <typename T>
+std::string PrettyPrint(T* ptr) SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::ostringstream oss;
+  oss << ptr;
+  return oss.str();
+}
+
+template <>
+std::string PrettyPrint(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return PrettyMethod(method);
+}
+
+template <typename T>
 T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
     auto it = native_object_relocations_.find(obj);
-    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
-        << Runtime::Current()->GetHeap()->DumpSpaces();
+    CHECK(it != native_object_relocations_.end()) << obj << " " << PrettyPrint(obj)
+        << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces();
     const NativeObjectRelocation& relocation = it->second;
     ImageInfo& image_info = GetImageInfo(relocation.oat_index);
     return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
@@ -2210,6 +2263,8 @@
       return kBinDexCacheArray;
     case kNativeObjectRelocationTypeRuntimeMethod:
       return kBinRuntimeMethod;
+    case kNativeObjectRelocationTypeIMTable:
+      return kBinImTable;
     case kNativeObjectRelocationTypeIMTConflictTable:
       return kBinIMTConflictTable;
   }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 51976c5..1efdc22 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -169,6 +169,8 @@
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
     kBinArtMethodDirty,
+    // IMT (clean)
+    kBinImTable,
     // Conflict tables (clean).
     kBinIMTConflictTable,
     // Runtime methods (always clean, do not have a length prefix array).
@@ -191,6 +193,7 @@
     kNativeObjectRelocationTypeArtMethodDirty,
     kNativeObjectRelocationTypeArtMethodArrayDirty,
     kNativeObjectRelocationTypeRuntimeMethod,
+    kNativeObjectRelocationTypeIMTable,
     kNativeObjectRelocationTypeIMTConflictTable,
     kNativeObjectRelocationTypeDexCacheArray,
   };
@@ -401,6 +404,7 @@
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void CopyAndFixupImTable(ImTable* orig, ImTable* copy) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
@@ -433,6 +437,8 @@
                           size_t oat_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void TryAssignImTableOffset(ImTable* imt, size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
   // relocation.
   void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index)
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4520f9b..d40e2b9 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -314,7 +314,8 @@
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
-  LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall);
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnMainOnly);
 
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
@@ -378,7 +379,7 @@
 
   ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena();
   LocationSummary* locations =
-      new (allocator) LocationSummary(field_access, LocationSummary::kCall);
+      new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly);
 
   locations->AddTemp(calling_convention.GetFieldIndexLocation());
 
@@ -499,7 +500,7 @@
                                                    bool code_generator_supports_read_barrier) {
   ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
   LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
           cls->CanCallRuntime())
             ? LocationSummary::kCallOnSlowPath
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e9bc3b3..1aa7b54 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1909,8 +1909,6 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -1936,10 +1934,14 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  __ LoadFromOffset(kLoadWord, temp, temp,
+        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
+  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   uint32_t entry_point =
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
-  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
@@ -2032,7 +2034,7 @@
       (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
         && result_type == Primitive::kPrimLong)
        || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -2310,8 +2312,7 @@
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-int' instruction.
           SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          __ vmovs(temp, in.AsFpuRegister<SRegister>());
-          __ vcvtis(temp, temp);
+          __ vcvtis(temp, in.AsFpuRegister<SRegister>());
           __ vmovrs(out.AsRegister<Register>(), temp);
           break;
         }
@@ -2319,9 +2320,7 @@
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-int' instruction.
           SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          DRegister temp_d = FromLowSToD(temp_s);
-          __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
-          __ vcvtid(temp_s, temp_d);
+          __ vcvtid(temp_s, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
           __ vmovrs(out.AsRegister<Register>(), temp_s);
           break;
         }
@@ -2854,13 +2853,13 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   if (div->GetResultType() == Primitive::kPrimLong) {
     // pLdiv runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
   } else if (div->GetResultType() == Primitive::kPrimInt &&
              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
     // pIdivmod runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2979,7 +2978,7 @@
   Primitive::Type type = rem->GetResultType();
 
   // Most remainders are implemented in the runtime.
-  LocationSummary::CallKind call_kind = LocationSummary::kCall;
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
   if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
     call_kind = LocationSummary::kNoCall;
@@ -3516,7 +3515,7 @@
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
   } else {
@@ -3549,7 +3548,7 @@
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(R0));
@@ -5470,7 +5469,7 @@
 
 void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5871,7 +5870,7 @@
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6298,21 +6297,12 @@
   // /* LockWord */ lock_word = LockWord(monitor)
   static_assert(sizeof(LockWord) == sizeof(int32_t),
                 "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
-  __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
-  // Introduce a dependency on the high bits of rb_state, which shall
-  // be all zeroes, to prevent load-load reordering, and without using
+  // Introduce a dependency on the lock_word including the rb_state,
+  // which shall prevent load-load reordering without using
   // a memory barrier (which would be more expensive).
-  // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
-  __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
-  // obj is unchanged by this operation, but its value now depends on
-  // IP, which depends on temp_reg.
-  __ add(obj, obj, ShifterOperand(IP));
+  // obj is unchanged by this operation, but its value now depends on temp_reg.
+  __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
 
   // The actual reference load.
   if (index.IsValid()) {
@@ -6349,8 +6339,14 @@
 
   // if (rb_state == ReadBarrier::gray_ptr_)
   //   ref = ReadBarrier::Mark(ref);
-  __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit out of the lock word with LSRS
+  // which can be a 16-bit instruction unlike the TST immediate.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
+  __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -6974,18 +6970,25 @@
 
 void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArmPointerSize).SizeValue();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      method_offset);
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kArmPointerSize));
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->Out().AsRegister<Register>(),
+                      method_offset);
   }
-  __ LoadFromOffset(kLoadWord,
-                    locations->Out().AsRegister<Register>(),
-                    locations->InAt(0).AsRegister<Register>(),
-                    method_offset);
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index e480f12..d9d675e 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3525,8 +3525,6 @@
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   LocationSummary* locations = invoke->GetLocations();
   Register temp = XRegisterFrom(locations->GetTemp(0));
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
@@ -3556,6 +3554,10 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
+  __ Ldr(temp,
+      MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -4290,7 +4292,7 @@
 
 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -4388,7 +4390,7 @@
 
 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
   locations->SetOut(LocationFrom(x0));
@@ -4413,7 +4415,7 @@
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(LocationFrom(kArtMethodRegister));
@@ -4566,7 +4568,8 @@
 void LocationsBuilderARM64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -4783,7 +4786,7 @@
 
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -5223,23 +5226,12 @@
   // /* LockWord */ lock_word = LockWord(monitor)
   static_assert(sizeof(LockWord) == sizeof(int32_t),
                 "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
-  __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
-  // Introduce a dependency on the high bits of rb_state, which shall
-  // be all zeroes, to prevent load-load reordering, and without using
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
-  // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
-  Register temp2 = temps.AcquireW();
-  __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
-  // obj is unchanged by this operation, but its value now depends on
-  // temp2, which depends on temp.
-  __ Add(obj, obj, Operand(temp2));
-  temps.Release(temp2);
+  // obj is unchanged by this operation, but its value now depends on temp.
+  __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
 
   // The actual reference load.
   if (index.IsValid()) {
@@ -5265,7 +5257,7 @@
         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
         Load(type, ref_reg, HeapOperand(obj, computed_offset));
       } else {
-        temp2 = temps.AcquireW();
+        Register temp2 = temps.AcquireW();
         __ Add(temp2, obj, offset);
         Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
         temps.Release(temp2);
@@ -5291,8 +5283,11 @@
 
   // if (rb_state == ReadBarrier::gray_ptr_)
   //   ref = ReadBarrier::Mark(ref);
-  __ Cmp(temp, ReadBarrier::gray_ptr_);
-  __ B(eq, slow_path->GetEntryLabel());
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -5367,16 +5362,19 @@
 
 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArm64PointerSize).SizeValue();
+    __ Ldr(XRegisterFrom(locations->Out()),
+           MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kArm64PointerSize));
+    __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
+        mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+    __ Ldr(XRegisterFrom(locations->Out()),
+           MemOperand(XRegisterFrom(locations->Out()), method_offset));
   }
-  __ Ldr(XRegisterFrom(locations->Out()),
-         MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
 }
 
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 37f1c35..2b71da0 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1855,7 +1855,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2467,7 +2467,7 @@
 void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
   Primitive::Type type = div->GetResultType();
   LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -3430,7 +3430,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -3557,7 +3557,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -3772,8 +3772,6 @@
 void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
@@ -3790,6 +3788,10 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadWord, temp, temp,
+      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -4216,7 +4218,7 @@
 
 void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4395,7 +4397,7 @@
 
 void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
@@ -4421,7 +4423,7 @@
 
 void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -4591,7 +4593,7 @@
 void LocationsBuilderMIPS::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall;
+      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -4828,7 +4830,7 @@
 
 void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4857,7 +4859,7 @@
   if (!isR6 &&
       ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
        (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -5378,18 +5380,25 @@
 
 void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kMipsPointerSize).SizeValue();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      method_offset);
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kMipsPointerSize));
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->Out().AsRegister<Register>(),
+                      method_offset);
   }
-  __ LoadFromOffset(kLoadWord,
-                    locations->Out().AsRegister<Register>(),
-                    locations->InAt(0).AsRegister<Register>(),
-                    method_offset);
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 2e78884..aa1ba84 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1436,7 +1436,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2932,8 +2932,6 @@
 void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
@@ -2950,6 +2948,10 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp,
+      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3290,7 +3292,7 @@
 
 void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -3417,7 +3419,7 @@
 
 void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
@@ -3438,7 +3440,7 @@
 
 void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3598,7 +3600,8 @@
 void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -3811,7 +3814,7 @@
 
 void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b33cabb..1cc6060 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -140,12 +140,29 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
     InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<Register>(), array_len);
+    }
     x86_codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
@@ -2045,8 +2062,6 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -2073,7 +2088,12 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  // temp = temp->GetAddressOfIMT()
+  __ movl(temp,
+      Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp,
@@ -2200,7 +2220,7 @@
   LocationSummary::CallKind call_kind =
       ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
        && result_type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -3455,7 +3475,7 @@
 
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
   LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
 
@@ -3558,7 +3578,7 @@
   Primitive::Type type = rem->GetResultType();
 
   LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
@@ -4000,7 +4020,7 @@
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -4033,7 +4053,7 @@
 
 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -4088,16 +4108,21 @@
 
 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86PointerSize).SizeValue();
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->InAt(0).AsRegister<Register>(), method_offset));
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kX86PointerSize));
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->InAt(0).AsRegister<Register>(),
+                    mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
+    // temp = temp->GetImtEntryAt(method_offset);
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->Out().AsRegister<Register>(), method_offset));
   }
-  __ movl(locations->Out().AsRegister<Register>(),
-          Address(locations->InAt(0).AsRegister<Register>(), method_offset));
 }
 
 void LocationsBuilderX86::VisitNot(HNot* not_) {
@@ -5528,10 +5553,16 @@
 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
@@ -5546,7 +5577,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -5580,12 +5614,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    Register length = length_loc.AsRegister<Register>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<Register>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<Register>());
+      Register length = length_loc.AsRegister<Register>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<Register>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -6253,7 +6303,7 @@
 
 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6705,7 +6755,7 @@
 
 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a524057..a015893 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -194,14 +194,31 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
+    InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+    }
+
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
-    InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
@@ -2275,8 +2292,6 @@
   LocationSummary* locations = invoke->GetLocations();
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
@@ -2302,6 +2317,12 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  // temp = temp->GetAddressOfIMT()
+  __ movq(temp,
+      Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+  // temp = temp->GetImtEntryAt(method_offset);
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -3927,7 +3948,7 @@
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3960,7 +3981,7 @@
 
 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -4020,16 +4041,20 @@
 
 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->InAt(0).AsRegister<CpuRegister>(),
+            mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
   }
-  __ movq(locations->Out().AsRegister<CpuRegister>(),
-          Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
 }
 
 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
@@ -4998,10 +5023,16 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
@@ -5016,7 +5047,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(length));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -5026,8 +5060,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
-  SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -5050,12 +5083,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    CpuRegister length = length_loc.AsRegister<CpuRegister>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      CpuRegister length = length_loc.AsRegister<CpuRegister>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5665,7 +5714,7 @@
 
 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6175,7 +6224,7 @@
 
 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9d67373..e14f603 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -401,6 +401,9 @@
   void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
     StartAttributeStream("is_string_length") << std::boolalpha
         << array_length->IsStringLength() << std::noboolalpha;
+    if (array_length->IsEmittedAtUseSite()) {
+      StartAttributeStream("emitted_at_use") << "true";
+    }
   }
 
   void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f9e78b0..6c1292c 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -656,8 +656,8 @@
     }
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
-      new_method = ic.GetTypeAt(i)->GetEmbeddedImTableEntry(
-          method_index % mirror::Class::kImtSize, pointer_size);
+      new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
+          method_index % ImTable::kSize, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
         // interface table.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e0410dc..4ca0600 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -920,6 +920,7 @@
 void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
+  bool integral_type = Primitive::IsIntegralType(instruction->GetType());
   if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) {
     // Replace code looking like
     //    ADD dst, src, 0
@@ -928,7 +929,7 @@
     // Note that we cannot optimize `x + 0.0` to `x` for floating-point. When
     // `x` is `-0.0`, the former expression yields `0.0`, while the later
     // yields `-0.0`.
-    if (Primitive::IsIntegralType(instruction->GetType())) {
+    if (integral_type) {
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
       RecordSimplification();
@@ -974,10 +975,31 @@
   // so no need to return.
   TryHandleAssociativeAndCommutativeOperation(instruction);
 
-  if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) &&
+  if ((left->IsSub() || right->IsSub()) &&
       TrySubtractionChainSimplification(instruction)) {
     return;
   }
+
+  if (integral_type) {
+    // Replace code patterns looking like
+    //    SUB dst1, x, y        SUB dst1, x, y
+    //    ADD dst2, dst1, y     ADD dst2, y, dst1
+    // with
+    //    SUB dst1, x, y
+    // ADD instruction is not needed in this case, we may use
+    // one of inputs of SUB instead.
+    if (left->IsSub() && left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (right->IsSub() && right->InputAt(1) == left) {
+      instruction->ReplaceWith(right->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+  }
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -1511,6 +1533,29 @@
   if (TrySubtractionChainSimplification(instruction)) {
     return;
   }
+
+  if (left->IsAdd()) {
+    // Replace code patterns looking like
+    //    ADD dst1, x, y        ADD dst1, x, y
+    //    SUB dst2, dst1, y     SUB dst2, dst1, x
+    // with
+    //    ADD dst1, x, y
+    // SUB instruction is not needed in this case, we may use
+    // one of inputs of ADD instead.
+    // It is applicable to integral types only.
+    DCHECK(Primitive::IsIntegralType(type));
+    if (left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (left->InputAt(0) == right) {
+      instruction->ReplaceWith(left->InputAt(1));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+  }
 }
 
 void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 579fb9d..bbdcee4 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1212,7 +1212,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1232,7 +1232,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1250,7 +1250,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1280,7 +1280,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1307,7 +1307,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1665,7 +1665,7 @@
   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   const InvokeRuntimeCallingConvention calling_convention;
 
@@ -1692,7 +1692,7 @@
   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   const InvokeRuntimeCallingConvention calling_convention;
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 30fa650..16438a7 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -608,54 +608,66 @@
   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
-static void CreateFPToIntPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
   locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
 }
 
-static void GenMathRound(LocationSummary* locations,
-                         bool is_double,
-                         vixl::MacroAssembler* masm) {
-  FPRegister in_reg = is_double ?
-      DRegisterFrom(locations->InAt(0)) : SRegisterFrom(locations->InAt(0));
-  Register out_reg = is_double ?
-      XRegisterFrom(locations->Out()) : WRegisterFrom(locations->Out());
-  UseScratchRegisterScope temps(masm);
-  FPRegister temp1_reg = temps.AcquireSameSizeAs(in_reg);
+static void GenMathRound(HInvoke* invoke, bool is_double, vixl::MacroAssembler* masm) {
+  // Java 8 API definition for Math.round():
+  // Return the closest long or int to the argument, with ties rounding to positive infinity.
+  //
+  // There is no single instruction in ARMv8 that can support the above definition.
+  // We choose to use FCVTAS here, because it has closest semantic.
+  // FCVTAS performs rounding to nearest integer, ties away from zero.
+  // For most inputs (positive values, zero or NaN), this instruction is enough.
+  // We only need a few handling code after FCVTAS if the input is negative half value.
+  //
+  // The reason why we didn't choose FCVTPS instruction here is that
+  // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
+  // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
+  // If we were using this instruction, for most inputs, more handling code would be needed.
+  LocationSummary* l = invoke->GetLocations();
+  FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
+  FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
+  Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
+  vixl::Label done;
 
-  // 0.5 can be encoded as an immediate, so use fmov.
-  if (is_double) {
-    __ Fmov(temp1_reg, static_cast<double>(0.5));
-  } else {
-    __ Fmov(temp1_reg, static_cast<float>(0.5));
-  }
-  __ Fadd(temp1_reg, in_reg, temp1_reg);
-  __ Fcvtms(out_reg, temp1_reg);
+  // Round to nearest integer, ties away from zero.
+  __ Fcvtas(out_reg, in_reg);
+
+  // For positive values, zero or NaN inputs, rounding is done.
+  __ Tbz(out_reg, out_reg.size() - 1, &done);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, out_reg += 1.
+  __ Frinta(tmp_fp, in_reg);
+  __ Fsub(tmp_fp, in_reg, tmp_fp);
+  __ Fcmp(tmp_fp, 0.5);
+  __ Cinc(out_reg, out_reg, eq);
+
+  __ Bind(&done);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateFPToIntPlusTempLocations(arena_, invoke);
-  }
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateFPToIntPlusTempLocations(arena_, invoke);
-  }
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
@@ -1393,7 +1405,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1413,7 +1425,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1431,7 +1443,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1461,7 +1473,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1488,7 +1500,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1519,7 +1531,7 @@
   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
 
@@ -1534,7 +1546,7 @@
   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index d4f44d6..0bfa025 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1875,7 +1875,7 @@
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2071,7 +2071,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2096,7 +2096,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2122,7 +2122,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2155,7 +2155,7 @@
 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2187,7 +2187,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 9243f4c..a9807bd 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -876,6 +876,151 @@
   GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler());
 }
 
+static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Primitive::Type type) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
+
+  Mips64Label done;
+  Mips64Label finite;
+  Mips64Label add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor(in);
+  //
+  // /*
+  //  * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-/64-bit
+  //  * signed integer will be processed by floor.X.Y to output
+  //  * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
+  //  * processed by this "if" statement.
+  //  *
+  //  * However, this bug in the 64-bit MIPS emulator causes the
+  //  * behavior of floor.X.Y to be the same as pre-R6 implementations
+  //  * of MIPS64.  When that bug is fixed this logic should be amended.
+  //  */
+  // if (out == MAX_VALUE) {
+  //   TMP = (in < 0.0) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * MAX_VALUE to MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (type == Primitive::kPrimDouble) {
+    __ CmpUnD(FTMP, in, in);
+  } else {
+    __ CmpUnS(FTMP, in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  __ Bc1nez(FTMP, &done);
+
+  // out = floor(in);
+  if (type == Primitive::kPrimDouble) {
+    __ FloorLD(FTMP, in);
+    __ Dmfc1(out, FTMP);
+  } else {
+    __ FloorWS(FTMP, in);
+    __ Mfc1(out, FTMP);
+  }
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
+  } else {
+    __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  }
+  __ Bnec(AT, out, &finite);
+
+  if (type == Primitive::kPrimDouble) {
+    __ Dmtc1(ZERO, FTMP);
+    __ CmpLtD(FTMP, in, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Mtc1(ZERO, FTMP);
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bc(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5 <= (in - out)) ? -1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ Cvtdl(FTMP, FTMP);  // Convert output of floor.l.d back to "double".
+    __ LoadConst64(AT, bit_cast<int64_t, double>(0.5));
+    __ SubD(FTMP, in, FTMP);
+    __ Dmtc1(AT, half);
+    __ CmpLeD(FTMP, half, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+    __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+    __ SubS(FTMP, in, FTMP);
+    __ Mtc1(AT, half);
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bind(&add);
+
+  // Return out -= TMP.
+  if (type == Primitive::kPrimDouble) {
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Subu(out, out, AT);
+  }
+
+  __ Bind(&done);
+}
+
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimFloat);
+}
+
+// long java.lang.Math.round(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimDouble);
+}
+
 // byte libcore.io.Memory.peekByte(long address)
 void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
@@ -1374,7 +1519,7 @@
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1562,7 +1707,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1583,7 +1728,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1603,7 +1748,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1638,7 +1783,7 @@
 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1671,7 +1816,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1734,9 +1879,6 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat)
-
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 812bdf5..6c81421 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -706,7 +706,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -774,7 +774,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(EAX));
@@ -831,7 +831,7 @@
 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
                                       HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -985,7 +985,7 @@
 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
                                         HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1216,7 +1216,7 @@
 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1490,7 +1490,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1518,7 +1518,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1543,7 +1543,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 891aaf5..28f1f4f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -526,7 +526,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -588,7 +588,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -699,7 +699,7 @@
 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
                                       HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -839,7 +839,7 @@
 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
                                         HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1303,7 +1303,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1577,7 +1577,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1606,7 +1606,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1632,7 +1632,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 3f27c91..7a78bfd 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -481,7 +481,7 @@
   enum CallKind {
     kNoCall,
     kCallOnSlowPath,
-    kCall
+    kCallOnMainOnly
   };
 
   LocationSummary(HInstruction* instruction,
@@ -541,7 +541,7 @@
   Location Out() const { return output_; }
 
   bool CanCall() const { return call_kind_ != kNoCall; }
-  bool WillCall() const { return call_kind_ == kCall; }
+  bool WillCall() const { return call_kind_ == kCallOnMainOnly; }
   bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
   bool NeedsSafepoint() const { return CanCall(); }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d703b0f..d6e09d7 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -37,6 +37,10 @@
 #include "pc_relative_fixups_x86.h"
 #endif
 
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#include "x86_memory_gen.h"
+#endif
+
 #include "art_method-inl.h"
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
@@ -485,13 +489,27 @@
     case kX86: {
       x86::PcRelativeFixups* pc_relative_fixups =
           new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
       HOptimization* x86_optimizations[] = {
-          pc_relative_fixups
+          pc_relative_fixups,
+          memory_gen
       };
       RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64: {
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
+      HOptimization* x86_64_optimizations[] = {
+          memory_gen
+      };
+      RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer);
+      break;
+    }
+#endif
     default:
       break;
   }
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 9d99668..1b33408 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1346,9 +1346,15 @@
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots->size(); slot < e; ++slot) {
-    if ((*spill_slots)[slot] <= parent->GetStart()
-        && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) {
-      break;
+    if ((*spill_slots)[slot] <= parent->GetStart()) {
+      if (!parent->NeedsTwoSpillSlots()) {
+        // One spill slot is sufficient.
+        break;
+      }
+      if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
+        // Two spill slots are available.
+        break;
+      }
     }
   }
 
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
new file mode 100644
index 0000000..195159f
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "x86_memory_gen.h"
+#include "code_generator.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Replace instructions with memory operand forms.
+ */
+class MemoryOperandVisitor : public HGraphVisitor {
+ public:
+  MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
+      : HGraphVisitor(graph),
+        do_implicit_null_checks_(do_implicit_null_checks) {}
+
+ private:
+  void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE {
+    // Replace the length by the array itself, so that we can do compares to memory.
+    HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+
+    // We only want to replace an ArrayLength.
+    if (array_len == nullptr) {
+      return;
+    }
+
+    HInstruction* array = array_len->InputAt(0);
+    DCHECK_EQ(array->GetType(), Primitive::kPrimNot);
+
+    // Don't apply this optimization when the array is nullptr.
+    if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) {
+      return;
+    }
+
+    // Is there a null check that could be an implicit check?
+    if (array->IsNullCheck() && do_implicit_null_checks_) {
+      // The ArrayLen may generate the implicit null check.  Can the
+      // bounds check do so as well?
+      if (array_len->GetNextDisregardingMoves() != check) {
+        // No, it won't.  Leave as is.
+        return;
+      }
+    }
+
+    // Can we suppress the ArrayLength and generate at BoundCheck?
+    if (array_len->HasOnlyOneNonEnvironmentUse()) {
+      array_len->MarkEmittedAtUseSite();
+      // We need the ArrayLength just before the BoundsCheck.
+      array_len->MoveBefore(check);
+    }
+  }
+
+  bool do_implicit_null_checks_;
+};
+
+X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
+                                                       OptimizingCompilerStats* stats,
+                                                       CodeGenerator* codegen)
+    : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
+      do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
+}
+
+void X86MemoryOperandGeneration::Run() {
+  MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_);
+  visitor.VisitInsertionOrder();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
new file mode 100644
index 0000000..7e88681
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+class CodeGenerator;
+
+namespace x86 {
+
+class X86MemoryOperandGeneration : public HOptimization {
+ public:
+  X86MemoryOperandGeneration(HGraph* graph,
+                             OptimizingCompilerStats* stats,
+                             CodeGenerator* codegen);
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kX86MemoryOperandGenerationPassName =
+          "x86_memory_operand_generation";
+
+ private:
+  bool do_implicit_null_checks_;
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c133980..8d20e5b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -2038,7 +2038,7 @@
           location.c_str(), location.c_str(), kVerifyChecksum, &error_msg, opened_dex_files)) {
         // If we fail to open the dex file because it's been stripped, try to open the dex file
         // from its corresponding oat file.
-        OatFileAssistant oat_file_assistant(location.c_str(), isa, false, false);
+        OatFileAssistant oat_file_assistant(location.c_str(), isa, false);
         std::unique_ptr<OatFile> oat_file(oat_file_assistant.GetBestOatFile());
         if (oat_file == nullptr) {
           LOG(WARNING) << "Failed to open dex file and associated oat file for '" << location
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 48b773e..96c3267 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -118,7 +118,7 @@
  * "[I" becomes "int[]".  Also converts '$' to '.', which means this
  * form can't be converted back to a descriptor.
  */
-static char* descriptorToDot(const char* str) {
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
   int targetLen = strlen(str);
   int offset = 0;
 
@@ -145,8 +145,7 @@
   }
 
   // Copy class name over.
-  char* newStr = reinterpret_cast<char*>(
-      malloc(targetLen + arrayDepth * 2 + 1));
+  std::unique_ptr<char[]> newStr(new char[targetLen + arrayDepth * 2 + 1]);
   int i = 0;
   for (; i < targetLen; i++) {
     const char ch = str[offset + i];
@@ -165,12 +164,10 @@
 
 /*
  * Converts the class name portion of a type descriptor to human-readable
- * "dotted" form.
- *
- * Returns a newly-allocated string.
+ * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
  */
-static char* descriptorClassToDot(const char* str) {
-  // Reduce to just the class name, trimming trailing ';'.
+static std::unique_ptr<char[]> descriptorClassToDot(const char* str) {
+  // Reduce to just the class name prefix.
   const char* lastSlash = strrchr(str, '/');
   if (lastSlash == nullptr) {
     lastSlash = str + 1;  // start past 'L'
@@ -178,13 +175,14 @@
     lastSlash++;          // start past '/'
   }
 
-  char* newStr = strdup(lastSlash);
-  newStr[strlen(lastSlash) - 1] = '\0';
-  for (char* cp = newStr; *cp != '\0'; cp++) {
-    if (*cp == '$') {
-      *cp = '.';
-    }
+  // Copy class name over, trimming trailing ';'.
+  const int targetLen = strlen(lastSlash);
+  std::unique_ptr<char[]> newStr(new char[targetLen]);
+  for (int i = 0; i < targetLen - 1; i++) {
+    const char ch = lastSlash[i];
+    newStr[i] = ch == '$' ? '.' : ch;
   }  // for
+  newStr[targetLen - 1] = '\0';
   return newStr;
 }
 
@@ -723,9 +721,8 @@
   if (gOptions.outputFormat == OUTPUT_PLAIN) {
     fprintf(gOutFile, "    #%d              : '%s'\n", i, interfaceName);
   } else {
-    char* dotted = descriptorToDot(interfaceName);
-    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dotted);
-    free(dotted);
+    std::unique_ptr<char[]> dot(descriptorToDot(interfaceName));
+    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dot.get());
   }
 }
 
@@ -777,15 +774,13 @@
 
 /*
  * Helper for dumpInstruction(), which builds the string
- * representation for the index in the given instruction. This will
- * first try to use the given buffer, but if the result won't fit,
- * then this will allocate a new buffer to hold the result. A pointer
- * to the buffer which holds the full result is always returned, and
- * this can be compared with the one passed in, to see if the result
- * needs to be free()d.
+ * representation for the index in the given instruction.
+ * Returns a pointer to a buffer of sufficient size.
  */
-static char* indexString(const DexFile* pDexFile,
-                         const Instruction* pDecInsn, char* buf, size_t bufSize) {
+static std::unique_ptr<char[]> indexString(const DexFile* pDexFile,
+                                           const Instruction* pDecInsn,
+                                           size_t bufSize) {
+  std::unique_ptr<char[]> buf(new char[bufSize]);
   // Determine index and width of the string.
   u4 index = 0;
   u4 width = 4;
@@ -821,27 +816,27 @@
     case Instruction::kIndexUnknown:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<unknown-index>");
+      outSize = snprintf(buf.get(), bufSize, "<unknown-index>");
       break;
     case Instruction::kIndexNone:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<no-index>");
+      outSize = snprintf(buf.get(), bufSize, "<no-index>");
       break;
     case Instruction::kIndexTypeRef:
       if (index < pDexFile->GetHeader().type_ids_size_) {
         const char* tp = pDexFile->StringByTypeIdx(index);
-        outSize = snprintf(buf, bufSize, "%s // type@%0*x", tp, width, index);
+        outSize = snprintf(buf.get(), bufSize, "%s // type@%0*x", tp, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<type?> // type@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<type?> // type@%0*x", width, index);
       }
       break;
     case Instruction::kIndexStringRef:
       if (index < pDexFile->GetHeader().string_ids_size_) {
         const char* st = pDexFile->StringDataByIdx(index);
-        outSize = snprintf(buf, bufSize, "\"%s\" // string@%0*x", st, width, index);
+        outSize = snprintf(buf.get(), bufSize, "\"%s\" // string@%0*x", st, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<string?> // string@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<string?> // string@%0*x", width, index);
       }
       break;
     case Instruction::kIndexMethodRef:
@@ -850,10 +845,10 @@
         const char* name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
         const Signature signature = pDexFile->GetMethodSignature(pMethodId);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // method@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // method@%0*x",
                            backDescriptor, name, signature.ToString().c_str(), width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<method?> // method@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<method?> // method@%0*x", width, index);
       }
       break;
     case Instruction::kIndexFieldRef:
@@ -862,38 +857,33 @@
         const char* name = pDexFile->StringDataByIdx(pFieldId.name_idx_);
         const char* typeDescriptor = pDexFile->StringByTypeIdx(pFieldId.type_idx_);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pFieldId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // field@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // field@%0*x",
                            backDescriptor, name, typeDescriptor, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<field?> // field@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<field?> // field@%0*x", width, index);
       }
       break;
     case Instruction::kIndexVtableOffset:
-      outSize = snprintf(buf, bufSize, "[%0*x] // vtable #%0*x",
+      outSize = snprintf(buf.get(), bufSize, "[%0*x] // vtable #%0*x",
                          width, index, width, index);
       break;
     case Instruction::kIndexFieldOffset:
-      outSize = snprintf(buf, bufSize, "[obj+%0*x]", width, index);
+      outSize = snprintf(buf.get(), bufSize, "[obj+%0*x]", width, index);
       break;
     // SOME NOT SUPPORTED:
     // case Instruction::kIndexVaries:
     // case Instruction::kIndexInlineMethod:
     default:
-      outSize = snprintf(buf, bufSize, "<?>");
+      outSize = snprintf(buf.get(), bufSize, "<?>");
       break;
   }  // switch
 
   // Determine success of string construction.
   if (outSize >= bufSize) {
-    // The buffer wasn't big enough; allocate and retry. Note:
-    // snprintf() doesn't count the '\0' as part of its returned
-    // size, so we add explicit space for it here.
-    outSize++;
-    buf = reinterpret_cast<char*>(malloc(outSize));
-    if (buf == nullptr) {
-      return nullptr;
-    }
-    return indexString(pDexFile, pDecInsn, buf, outSize);
+    // The buffer wasn't big enough; retry with computed size. Note: snprintf()
+    // doesn't count/ the '\0' as part of its returned size, so we add explicit
+    // space for it here.
+    return indexString(pDexFile, pDecInsn, outSize + 1);
   }
   return buf;
 }
@@ -941,11 +931,9 @@
   }
 
   // Set up additional argument.
-  char indexBufChars[200];
-  char *indexBuf = indexBufChars;
+  std::unique_ptr<char[]> indexBuf;
   if (Instruction::IndexTypeOf(pDecInsn->Opcode()) != Instruction::kIndexNone) {
-    indexBuf = indexString(pDexFile, pDecInsn,
-                           indexBufChars, sizeof(indexBufChars));
+    indexBuf = indexString(pDexFile, pDecInsn, 200);
   }
 
   // Dump the instruction.
@@ -1003,7 +991,7 @@
       break;
     case Instruction::k21c:        // op vAA, thing@BBBB
     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
-      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf);
+      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf.get());
       break;
     case Instruction::k23x:        // op vAA, vBB, vCC
       fprintf(gOutFile, " v%d, v%d, v%d",
@@ -1032,7 +1020,7 @@
     // NOT SUPPORTED:
     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
       fprintf(gOutFile, " v%d, v%d, %s",
-              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf);
+              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf.get());
       break;
     case Instruction::k30t:
       fprintf(gOutFile, " #%08x", pDecInsn->VRegA());
@@ -1069,7 +1057,7 @@
           fprintf(gOutFile, ", v%d", arg[i]);
         }
       }  // for
-      fprintf(gOutFile, "}, %s", indexBuf);
+      fprintf(gOutFile, "}, %s", indexBuf.get());
       break;
     }
     case Instruction::k25x: {      // op vC, {vD, vE, vF, vG} (B: count)
@@ -1101,7 +1089,7 @@
             fprintf(gOutFile, ", v%d", pDecInsn->VRegC() + i);
           }
         }  // for
-        fprintf(gOutFile, "}, %s", indexBuf);
+        fprintf(gOutFile, "}, %s", indexBuf.get());
       }
       break;
     case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
@@ -1124,10 +1112,6 @@
   }  // switch
 
   fputc('\n', gOutFile);
-
-  if (indexBuf != indexBufChars) {
-    free(indexBuf);
-  }
 }
 
 /*
@@ -1141,11 +1125,9 @@
   const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
 
   // Generate header.
-  char* tmp = descriptorToDot(backDescriptor);
-  fprintf(gOutFile, "%06x:                                        "
-          "|[%06x] %s.%s:%s\n",
-          codeOffset, codeOffset, tmp, name, signature.ToString().c_str());
-  free(tmp);
+  std::unique_ptr<char[]> dot(descriptorToDot(backDescriptor));
+  fprintf(gOutFile, "%06x:                                        |[%06x] %s.%s:%s\n",
+          codeOffset, codeOffset, dot.get(), name, signature.ToString().c_str());
 
   // Iterate over all instructions.
   const u2* insns = pCode->insns_;
@@ -1224,12 +1206,10 @@
 
     // Method name and prototype.
     if (constructor) {
-      char* tmp = descriptorClassToDot(backDescriptor);
-      fprintf(gOutFile, "<constructor name=\"%s\"\n", tmp);
-      free(tmp);
-      tmp = descriptorToDot(backDescriptor);
-      fprintf(gOutFile, " type=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorClassToDot(backDescriptor));
+      fprintf(gOutFile, "<constructor name=\"%s\"\n", dot.get());
+      dot = descriptorToDot(backDescriptor);
+      fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     } else {
       fprintf(gOutFile, "<method name=\"%s\"\n", name);
       const char* returnType = strrchr(typeDescriptor, ')');
@@ -1237,9 +1217,8 @@
         fprintf(stderr, "bad method type descriptor '%s'\n", typeDescriptor);
         goto bail;
       }
-      char* tmp = descriptorToDot(returnType+1);
-      fprintf(gOutFile, " return=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorToDot(returnType + 1));
+      fprintf(gOutFile, " return=\"%s\"\n", dot.get());
       fprintf(gOutFile, " abstract=%s\n", quotedBool((flags & kAccAbstract) != 0));
       fprintf(gOutFile, " native=%s\n", quotedBool((flags & kAccNative) != 0));
       fprintf(gOutFile, " synchronized=%s\n", quotedBool(
@@ -1272,18 +1251,17 @@
         } while (*cp++ != ';');
       } else {
         // Primitive char, copy it.
-        if (strchr("ZBCSIFJD", *base) == NULL) {
+        if (strchr("ZBCSIFJD", *base) == nullptr) {
           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
-          goto bail;
+          break;  // while
         }
         *cp++ = *base++;
       }
       // Null terminate and display.
       *cp++ = '\0';
-      char* tmp = descriptorToDot(tmpBuf);
+      std::unique_ptr<char[]> dot(descriptorToDot(tmpBuf));
       fprintf(gOutFile, "<parameter name=\"arg%d\" type=\"%s\">\n"
-                        "</parameter>\n", argNum++, tmp);
-      free(tmp);
+                        "</parameter>\n", argNum++, dot.get());
     }  // while
     free(tmpBuf);
     if (constructor) {
@@ -1325,9 +1303,8 @@
     }
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "<field name=\"%s\"\n", name);
-    char *tmp = descriptorToDot(typeDescriptor);
-    fprintf(gOutFile, " type=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorToDot(typeDescriptor));
+    fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     fprintf(gOutFile, " transient=%s\n", quotedBool((flags & kAccTransient) != 0));
     fprintf(gOutFile, " volatile=%s\n", quotedBool((flags & kAccVolatile) != 0));
     // The "value=" is not knowable w/o parsing annotations.
@@ -1482,13 +1459,11 @@
     }
     fprintf(gOutFile, "  Interfaces        -\n");
   } else {
-    char* tmp = descriptorClassToDot(classDescriptor);
-    fprintf(gOutFile, "<class name=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorClassToDot(classDescriptor));
+    fprintf(gOutFile, "<class name=\"%s\"\n", dot.get());
     if (superclassDescriptor != nullptr) {
-      tmp = descriptorToDot(superclassDescriptor);
-      fprintf(gOutFile, " extends=\"%s\"\n", tmp);
-      free(tmp);
+      dot = descriptorToDot(superclassDescriptor);
+      fprintf(gOutFile, " extends=\"%s\"\n", dot.get());
     }
     fprintf(gOutFile, " interface=%s\n",
             quotedBool((pClassDef.access_flags_ & kAccInterface) != 0));
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 6f19df5..a1bde0e 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -60,18 +60,17 @@
  * final ";" (if any) have been removed and all occurrences of '/'
  * have been changed to '.'.
  */
-static char* descriptorToDot(const char* str) {
-  size_t at = strlen(str);
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
+  size_t len = strlen(str);
   if (str[0] == 'L') {
-    at -= 2;  // Two fewer chars to copy.
-    str++;
+    len -= 2;  // Two fewer chars to copy (trims L and ;).
+    str++;     // Start past 'L'.
   }
-  char* newStr = reinterpret_cast<char*>(malloc(at + 1));
-  newStr[at] = '\0';
-  while (at > 0) {
-    at--;
-    newStr[at] = (str[at] == '/') ? '.' : str[at];
+  std::unique_ptr<char[]> newStr(new char[len + 1]);
+  for (size_t i = 0; i < len; i++) {
+    newStr[i] = (str[i] == '/') ? '.' : str[i];
   }
+  newStr[len] = '\0';
   return newStr;
 }
 
@@ -103,14 +102,13 @@
   const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(idx);
   const char* methodName = pDexFile->StringDataByIdx(pMethodId.name_idx_);
   const char* classDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-  char* className = descriptorToDot(classDescriptor);
+  std::unique_ptr<char[]> className(descriptorToDot(classDescriptor));
   const u4 insnsOff = codeOffset + 0x10;
 
   // Don't list methods that do not match a particular query.
   if (gOptions.methodToFind != nullptr &&
-      (strcmp(gOptions.classToFind, className) != 0 ||
+      (strcmp(gOptions.classToFind, className.get()) != 0 ||
        strcmp(gOptions.methodToFind, methodName) != 0)) {
-    free(className);
     return;
   }
 
@@ -130,10 +128,9 @@
   // Dump actual method information.
   fprintf(gOutFile, "0x%08x %d %s %s %s %s %d\n",
           insnsOff, pCode->insns_size_in_code_units_ * 2,
-          className, methodName, typeDesc, fileName, firstLine);
+          className.get(), methodName, typeDesc, fileName, firstLine);
 
   free(typeDesc);
-  free(className);
 }
 
 /*
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index bf563c7..d76bbb8 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -68,8 +68,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 1a3e3f5..c410cd9 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -782,23 +782,13 @@
         args << Rm;
 
         // Shift operand.
-        bool noShift = (imm5 == 0 && shift_type != 0x3);
+        bool noShift = (imm5 == 0 && shift_type == 0x0);
         if (!noShift) {
           args << ", ";
-          switch (shift_type) {
-            case 0x0: args << "lsl"; break;
-            case 0x1: args << "lsr"; break;
-            case 0x2: args << "asr"; break;
-            case 0x3:
-              if (imm5 == 0) {
-                args << "rrx";
-              } else {
-                args << "ror #" << imm5;
-              }
-              break;
-          }
-          if (shift_type != 0x3 /* rrx */) {
-            args << StringPrintf(" #%d", (0 != imm5 || 0 == shift_type) ? imm5 : 32);
+          if (shift_type == 0x3u && imm5 == 0u) {
+            args << "rrx";
+          } else {
+            args << kThumb2ShiftOperations[shift_type] << " #" << ((0 != imm5) ? imm5 : 32);
           }
         }
 
@@ -1516,82 +1506,82 @@
           }
           break;
         }
-      default:      // more formats
-        if ((op2 >> 4) == 2) {      // 010xxxx
-          // data processing (register)
-          if ((instr & 0x0080f0f0) == 0x0000f000) {
-            // LSL, LSR, ASR, ROR
-            uint32_t shift_op = (instr >> 21) & 3;
-            uint32_t S = (instr >> 20) & 1;
-            ArmRegister Rd(instr, 8);
+        default:      // more formats
+          if ((op2 >> 4) == 2) {      // 010xxxx
+            // data processing (register)
+            if ((instr & 0x0080f0f0) == 0x0000f000) {
+              // LSL, LSR, ASR, ROR
+              uint32_t shift_op = (instr >> 21) & 3;
+              uint32_t S = (instr >> 20) & 1;
+              ArmRegister Rd(instr, 8);
+              ArmRegister Rn(instr, 16);
+              ArmRegister Rm(instr, 0);
+              opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
+              args << Rd << ", " << Rn << ", " << Rm;
+            }
+          } else if ((op2 >> 3) == 6) {       // 0110xxx
+            // Multiply, multiply accumulate, and absolute difference
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0x1;
+            ArmRegister Ra(instr, 12);
             ArmRegister Rn(instr, 16);
             ArmRegister Rm(instr, 0);
-            opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
-            args << Rd << ", " << Rn << ", " << Rm;
-          }
-        } else if ((op2 >> 3) == 6) {       // 0110xxx
-          // Multiply, multiply accumulate, and absolute difference
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0x1;
-          ArmRegister Ra(instr, 12);
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          switch (op1) {
-          case 0:
-            if (op2 == 0) {
-              if (Ra.r == 0xf) {
-                opcode << "mul";
-                args << Rd << ", " << Rn << ", " << Rm;
+            ArmRegister Rd(instr, 8);
+            switch (op1) {
+            case 0:
+              if (op2 == 0) {
+                if (Ra.r == 0xf) {
+                  opcode << "mul";
+                  args << Rd << ", " << Rn << ", " << Rm;
+                } else {
+                  opcode << "mla";
+                  args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+                }
               } else {
-                opcode << "mla";
+                opcode << "mls";
                 args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
               }
-            } else {
-              opcode << "mls";
-              args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+              break;
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+                break;        // do these sometime
             }
-            break;
-          case 1:
-          case 2:
-          case 3:
-          case 4:
-          case 5:
-          case 6:
-              break;        // do these sometime
+          } else if ((op2 >> 3) == 7) {       // 0111xxx
+            // Long multiply, long multiply accumulate, and divide
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0xf;
+            ArmRegister Rn(instr, 16);
+            ArmRegister Rm(instr, 0);
+            ArmRegister Rd(instr, 8);
+            ArmRegister RdHi(instr, 8);
+            ArmRegister RdLo(instr, 12);
+            switch (op1) {
+            case 0:
+              opcode << "smull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 1:
+              opcode << "sdiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 2:
+              opcode << "umull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 3:
+              opcode << "udiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 4:
+            case 5:
+            case 6:
+              break;      // TODO: when we generate these...
+            }
           }
-        } else if ((op2 >> 3) == 7) {       // 0111xxx
-          // Long multiply, long multiply accumulate, and divide
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0xf;
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          ArmRegister RdHi(instr, 8);
-          ArmRegister RdLo(instr, 12);
-          switch (op1) {
-          case 0:
-            opcode << "smull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 1:
-            opcode << "sdiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 2:
-            opcode << "umull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 3:
-            opcode << "udiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 4:
-          case 5:
-          case 6:
-            break;      // TODO: when we generate these...
-          }
-        }
       }
       break;
     default:
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 0a7ffda..5bb61bb 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -494,6 +494,17 @@
   image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
 }
 
+void PatchOat::PatchImTables(const ImageHeader* image_header) {
+  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  // We can safely walk target image since the conflict tables are independent.
+  image_header->VisitPackedImTables(
+      [this](ArtMethod* method) {
+        return RelocatedAddressOfPointer(method);
+      },
+      image_->Begin(),
+      pointer_size);
+}
+
 void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
   const size_t pointer_size = InstructionSetPointerSize(isa_);
   // We can safely walk target image since the conflict tables are independent.
@@ -636,6 +647,7 @@
 
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
+  PatchImTables(image_header);
   PatchImtConflictTables(image_header);
   PatchInternedStrings(image_header);
   PatchClassTable(image_header);
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 3ef837f..61ec695 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -117,6 +117,7 @@
   bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchImTables(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchImtConflictTables(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1c442fc..99c4a82 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -515,8 +515,10 @@
 
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $$(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $$(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_MULTILIB := both
   endif
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 2b025f8..90b2406 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -99,6 +99,22 @@
     return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
   }
 
+  // Return true if two conflict tables are the same.
+  bool Equals(ImtConflictTable* other, size_t pointer_size) const {
+    size_t num = NumEntries(pointer_size);
+    if (num != other->NumEntries(pointer_size)) {
+      return false;
+    }
+    for (size_t i = 0; i < num; ++i) {
+      if (GetInterfaceMethod(i, pointer_size) != other->GetInterfaceMethod(i, pointer_size) ||
+          GetImplementationMethod(i, pointer_size) !=
+              other->GetImplementationMethod(i, pointer_size)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   // Visit all of the entries.
   // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
   // and also returns one. The order is <interface, implementation>.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 3ec8f21..cb97faa 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -857,11 +857,13 @@
     if (vtable != nullptr) {
       SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
     }
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        SanityCheckArtMethod(
-            klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_spaces);
+    if (klass->ShouldHaveImt()) {
+      ImTable* imt = klass->GetImt(pointer_size);
+      for (size_t i = 0; i < ImTable::kSize; ++i) {
+        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
       }
+    }
+    if (klass->ShouldHaveEmbeddedVTable()) {
       for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
         SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
@@ -3460,16 +3462,13 @@
     new_class->SetClassFlags(mirror::kClassFlagObjectArray);
   }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusLoaded, self);
-  {
-    ArtMethod* imt[mirror::Class::kImtSize];
-    std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-    new_class->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
-  }
+  new_class->PopulateEmbeddedVTable(image_pointer_size_);
+  ImTable* object_imt = java_lang_Object->GetImt(image_pointer_size_);
+  new_class->SetImt(object_imt, image_pointer_size_);
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
 
-
   // All arrays have java/lang/Cloneable and java/io/Serializable as
   // interfaces.  We need to set that up here, so that stuff like
   // "instanceof" works right.
@@ -5030,6 +5029,17 @@
   return class_loader == nullptr ? &boot_class_table_ : class_loader->GetClassTable();
 }
 
+static ImTable* FindSuperImt(mirror::Class* klass, size_t pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  while (klass->HasSuperClass()) {
+    klass = klass->GetSuperClass();
+    if (klass->ShouldHaveImt()) {
+      return klass->GetImt(pointer_size);
+    }
+  }
+  return nullptr;
+}
+
 bool ClassLinker::LinkClass(Thread* self,
                             const char* descriptor,
                             Handle<mirror::Class> klass,
@@ -5040,9 +5050,11 @@
   if (!LinkSuperClass(klass)) {
     return false;
   }
-  ArtMethod* imt[mirror::Class::kImtSize];
-  std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-  if (!LinkMethods(self, klass, interfaces, imt)) {
+  ArtMethod* imt_data[ImTable::kSize];
+  // If there are any new conflicts compared to super class.
+  bool new_conflict = false;
+  std::fill_n(imt_data, arraysize(imt_data), Runtime::Current()->GetImtUnimplementedMethod());
+  if (!LinkMethods(self, klass, interfaces, &new_conflict, imt_data)) {
     return false;
   }
   if (!LinkInstanceFields(self, klass)) {
@@ -5055,15 +5067,47 @@
   CreateReferenceInstanceOffsets(klass);
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
+  ImTable* imt = nullptr;
+  if (klass->ShouldHaveImt()) {
+    // If there are any new conflicts compared to the super class we can not make a copy. There
+    // can be cases where both will have a conflict method at the same slot without having the same
+    // set of conflicts. In this case, we can not share the IMT since the conflict table slow path
+    // will possibly create a table that is incorrect for either of the classes.
+    // Same IMT with new_conflict does not happen very often.
+    if (!new_conflict) {
+      ImTable* super_imt = FindSuperImt(klass.Get(), image_pointer_size_);
+      if (super_imt != nullptr) {
+        bool imt_equals = true;
+        for (size_t i = 0; i < ImTable::kSize && imt_equals; ++i) {
+          imt_equals = imt_equals && (super_imt->Get(i, image_pointer_size_) == imt_data[i]);
+        }
+        if (imt_equals) {
+          imt = super_imt;
+        }
+      }
+    }
+    if (imt == nullptr) {
+      LinearAlloc* allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
+      imt = reinterpret_cast<ImTable*>(
+          allocator->Alloc(self, ImTable::SizeInBytes(image_pointer_size_)));
+      if (imt == nullptr) {
+        return false;
+      }
+      imt->Populate(imt_data, image_pointer_size_);
+    }
+  }
+
   if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
     // We don't need to retire this class as it has no embedded tables or it was created the
     // correct size during class linker initialization.
     CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
 
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      klass->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
+    if (klass->ShouldHaveEmbeddedVTable()) {
+      klass->PopulateEmbeddedVTable(image_pointer_size_);
     }
-
+    if (klass->ShouldHaveImt()) {
+      klass->SetImt(imt, image_pointer_size_);
+    }
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5455,6 +5499,7 @@
 bool ClassLinker::LinkMethods(Thread* self,
                               Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                              bool* out_new_conflict,
                               ArtMethod** out_imt) {
   self->AllowThreadSuspension();
   // A map from vtable indexes to the method they need to be updated to point to. Used because we
@@ -5466,7 +5511,7 @@
   // any vtable entries with new default method implementations.
   return SetupInterfaceLookupTable(self, klass, interfaces)
           && LinkVirtualMethods(self, klass, /*out*/ &default_translations)
-          && LinkInterfaceMethods(self, klass, default_translations, out_imt);
+          && LinkInterfaceMethods(self, klass, default_translations, out_new_conflict, out_imt);
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -5624,7 +5669,7 @@
     StackHandleScope<2> hs(self);
     Handle<mirror::Class> super_class(hs.NewHandle(klass->GetSuperClass()));
     MutableHandle<mirror::PointerArray> vtable;
-    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+    if (super_class->ShouldHaveEmbeddedVTable()) {
       vtable = hs.NewHandle(AllocPointerArray(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         self->AssertPendingOOMException();
@@ -6024,6 +6069,7 @@
 void ClassLinker::SetIMTRef(ArtMethod* unimplemented_method,
                             ArtMethod* imt_conflict_method,
                             ArtMethod* current_method,
+                            /*out*/bool* new_conflict,
                             /*out*/ArtMethod** imt_ref) {
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
@@ -6040,40 +6086,82 @@
       *imt_ref = current_method;
     } else {
       *imt_ref = imt_conflict_method;
+      *new_conflict = true;
     }
   } else {
     // Place the default conflict method. Note that there may be an existing conflict
     // method in the IMT, but it could be one tailored to the super class, with a
     // specific ImtConflictTable.
     *imt_ref = imt_conflict_method;
+    *new_conflict = true;
   }
 }
 
 void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
-  DCHECK(klass->ShouldHaveEmbeddedImtAndVTable()) << PrettyClass(klass);
+  DCHECK(klass->ShouldHaveImt()) << PrettyClass(klass);
   DCHECK(!klass->IsTemp()) << PrettyClass(klass);
-  ArtMethod* imt[mirror::Class::kImtSize];
+  ArtMethod* imt_data[ImTable::kSize];
   Runtime* const runtime = Runtime::Current();
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
   ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
-  std::fill_n(imt, arraysize(imt), unimplemented_method);
+  std::fill_n(imt_data, arraysize(imt_data), unimplemented_method);
   if (klass->GetIfTable() != nullptr) {
+    bool new_conflict = false;
     FillIMTFromIfTable(klass->GetIfTable(),
                        unimplemented_method,
                        conflict_method,
                        klass,
-                       true,
-                       false,
-                       &imt[0]);
+                       /*create_conflict_tables*/true,
+                       /*ignore_copied_methods*/false,
+                       &new_conflict,
+                       &imt_data[0]);
   }
-  for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-    klass->SetEmbeddedImTableEntry(i, imt[i], image_pointer_size_);
+  if (!klass->ShouldHaveImt()) {
+    return;
+  }
+  // Compare the IMT with the super class including the conflict methods. If they are equivalent,
+  // we can just use the same pointer.
+  ImTable* imt = nullptr;
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class != nullptr && super_class->ShouldHaveImt()) {
+    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
+    bool same = true;
+    for (size_t i = 0; same && i < ImTable::kSize; ++i) {
+      ArtMethod* method = imt_data[i];
+      ArtMethod* super_method = super_imt->Get(i, image_pointer_size_);
+      if (method != super_method) {
+        bool is_conflict_table = method->IsRuntimeMethod() &&
+                                 method != unimplemented_method &&
+                                 method != conflict_method;
+        // Verify conflict contents.
+        bool super_conflict_table = super_method->IsRuntimeMethod() &&
+                                    super_method != unimplemented_method &&
+                                    super_method != conflict_method;
+        if (!is_conflict_table || !super_conflict_table) {
+          same = false;
+        } else {
+          ImtConflictTable* table1 = method->GetImtConflictTable(image_pointer_size_);
+          ImtConflictTable* table2 = super_method->GetImtConflictTable(image_pointer_size_);
+          same = same && table1->Equals(table2, image_pointer_size_);
+        }
+      }
+    }
+    if (same) {
+      imt = super_imt;
+    }
+  }
+  if (imt == nullptr) {
+    imt = klass->GetImt(image_pointer_size_);
+    DCHECK(imt != nullptr);
+    imt->Populate(imt_data, image_pointer_size_);
+  } else {
+    klass->SetImt(imt, image_pointer_size_);
   }
 }
 
 static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+  return interface_method->GetDexMethodIndex() % ImTable::kSize;
 }
 
 ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
@@ -6095,8 +6183,9 @@
                                      mirror::Class* klass,
                                      bool create_conflict_tables,
                                      bool ignore_copied_methods,
-                                     ArtMethod** imt) {
-  uint32_t conflict_counts[mirror::Class::kImtSize] = {};
+                                     /*out*/bool* new_conflict,
+                                     /*out*/ArtMethod** imt) {
+  uint32_t conflict_counts[ImTable::kSize] = {};
   for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
     mirror::Class* interface = if_table->GetInterface(i);
     const size_t num_virtuals = interface->NumVirtualMethods();
@@ -6138,6 +6227,7 @@
       SetIMTRef(unimplemented_method,
                 imt_conflict_method,
                 implementation_method,
+                /*out*/new_conflict,
                 /*out*/&imt[imt_index]);
     }
   }
@@ -6145,7 +6235,7 @@
   if (create_conflict_tables) {
     // Create the conflict tables.
     LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
       size_t conflicts = conflict_counts[i];
       if (imt[i] == imt_conflict_method) {
         ImtConflictTable* new_table = CreateImtConflictTable(conflicts, linear_alloc);
@@ -6432,12 +6522,14 @@
 void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
                                         ArtMethod* unimplemented_method,
                                         ArtMethod* imt_conflict_method,
+                                        bool* new_conflict,
                                         ArtMethod** imt) {
   DCHECK(klass->HasSuperClass());
   mirror::Class* super_class = klass->GetSuperClass();
-  if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
+  if (super_class->ShouldHaveImt()) {
+    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      imt[i] = super_imt->Get(i, image_pointer_size_);
     }
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
@@ -6450,6 +6542,7 @@
                          klass.Get(),
                          /*create_conflict_table*/false,
                          /*ignore_copied_methods*/true,
+                         /*out*/new_conflict,
                          /*out*/imt);
     }
   }
@@ -6460,6 +6553,7 @@
     Thread* self,
     Handle<mirror::Class> klass,
     const std::unordered_map<size_t, ClassLinker::MethodTranslation>& default_translations,
+    bool* out_new_conflict,
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
@@ -6495,6 +6589,7 @@
     FillImtFromSuperClass(klass,
                           unimplemented_method,
                           imt_conflict_method,
+                          out_new_conflict,
                           out_imt);
   }
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
@@ -6626,6 +6721,7 @@
                 SetIMTRef(unimplemented_method,
                           imt_conflict_method,
                           vtable_method,
+                          /*out*/out_new_conflict,
                           /*out*/imt_ptr);
               }
               break;
@@ -6768,6 +6864,7 @@
             SetIMTRef(unimplemented_method,
                       imt_conflict_method,
                       current_method,
+                      /*out*/out_new_conflict,
                       /*out*/imt_ptr);
           }
         }
@@ -6967,7 +7064,7 @@
       }
 
       // Fix up IMT next
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      for (size_t i = 0; i < ImTable::kSize; ++i) {
         auto it = move_table.find(out_imt[i]);
         if (it != move_table.end()) {
           out_imt[i] = it->second;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index ca5af19..d6822c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -833,6 +833,7 @@
   bool LinkMethods(Thread* self,
                    Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                   bool* out_new_conflict,
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -968,19 +969,20 @@
   // * kDefaultConflict - Conflicting method implementations were found when searching for
   //                      target_method. The value of *out_default_method is null.
   DefaultMethodSearchResult FindDefaultMethodImplementation(
-          Thread* self,
-          ArtMethod* target_method,
-          Handle<mirror::Class> klass,
-          /*out*/ArtMethod** out_default_method) const
+      Thread* self,
+      ArtMethod* target_method,
+      Handle<mirror::Class> klass,
+      /*out*/ArtMethod** out_default_method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
   // methods. See LinkVirtualMethods for an explanation of what default_translations is.
   bool LinkInterfaceMethods(
-          Thread* self,
-          Handle<mirror::Class> klass,
-          const std::unordered_map<size_t, MethodTranslation>& default_translations,
-          ArtMethod** out_imt)
+      Thread* self,
+      Handle<mirror::Class> klass,
+      const std::unordered_map<size_t, MethodTranslation>& default_translations,
+      bool* out_new_conflict,
+      ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
@@ -1096,6 +1098,7 @@
   void SetIMTRef(ArtMethod* unimplemented_method,
                  ArtMethod* imt_conflict_method,
                  ArtMethod* current_method,
+                 /*out*/bool* new_conflict,
                  /*out*/ArtMethod** imt_ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillIMTFromIfTable(mirror::IfTable* if_table,
@@ -1104,11 +1107,13 @@
                           mirror::Class* klass,
                           bool create_conflict_tables,
                           bool ignore_copied_methods,
-                          ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+                          /*out*/bool* new_conflict,
+                          /*out*/ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillImtFromSuperClass(Handle<mirror::Class> klass,
                              ArtMethod* unimplemented_method,
                              ArtMethod* imt_conflict_method,
+                             bool* new_conflict,
                              ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 488826b..48b6316 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -100,6 +100,62 @@
     EXPECT_EQ(kAccPublic | kAccFinal | kAccAbstract, primitive->GetAccessFlags());
   }
 
+  void AssertObjectClass(mirror::Class* JavaLangObject)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    ASSERT_TRUE(JavaLangObject != nullptr);
+    ASSERT_TRUE(JavaLangObject->GetClass() != nullptr);
+    ASSERT_EQ(JavaLangObject->GetClass(),
+              JavaLangObject->GetClass()->GetClass());
+    EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
+    std::string temp;
+    ASSERT_STREQ(JavaLangObject->GetDescriptor(&temp), "Ljava/lang/Object;");
+    EXPECT_TRUE(JavaLangObject->GetSuperClass() == nullptr);
+    EXPECT_FALSE(JavaLangObject->HasSuperClass());
+    EXPECT_TRUE(JavaLangObject->GetClassLoader() == nullptr);
+    EXPECT_EQ(mirror::Class::kStatusInitialized, JavaLangObject->GetStatus());
+    EXPECT_FALSE(JavaLangObject->IsErroneous());
+    EXPECT_TRUE(JavaLangObject->IsLoaded());
+    EXPECT_TRUE(JavaLangObject->IsResolved());
+    EXPECT_TRUE(JavaLangObject->IsVerified());
+    EXPECT_TRUE(JavaLangObject->IsInitialized());
+    EXPECT_FALSE(JavaLangObject->IsArrayInstance());
+    EXPECT_FALSE(JavaLangObject->IsArrayClass());
+    EXPECT_TRUE(JavaLangObject->GetComponentType() == nullptr);
+    EXPECT_FALSE(JavaLangObject->IsInterface());
+    EXPECT_TRUE(JavaLangObject->IsPublic());
+    EXPECT_FALSE(JavaLangObject->IsFinal());
+    EXPECT_FALSE(JavaLangObject->IsPrimitive());
+    EXPECT_FALSE(JavaLangObject->IsSynthetic());
+    EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
+    EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
+    if (!kUseBrooksReadBarrier) {
+      EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
+    } else {
+      EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
+    }
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(),
+                 "shadow$_klass_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(),
+                 "shadow$_monitor_");
+    if (kUseBrooksReadBarrier) {
+      EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(),
+                   "shadow$_x_rb_ptr_");
+      EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(),
+                   "shadow$_x_xpadding_");
+    }
+
+    EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
+    EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
+
+    size_t pointer_size = class_linker_->GetImagePointerSize();
+    ArtMethod* unimplemented = runtime_->GetImtUnimplementedMethod();
+    ImTable* imt = JavaLangObject->GetImt(pointer_size);
+    ASSERT_NE(nullptr, imt);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      ASSERT_EQ(unimplemented, imt->Get(i, pointer_size));
+    }
+  }
+
   void AssertArrayClass(const std::string& array_descriptor,
                         const std::string& component_type,
                         mirror::ClassLoader* class_loader)
@@ -148,7 +204,8 @@
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
     EXPECT_EQ(2U, array->NumDirectInterfaces());
-    EXPECT_TRUE(array->ShouldHaveEmbeddedImtAndVTable());
+    EXPECT_TRUE(array->ShouldHaveImt());
+    EXPECT_TRUE(array->ShouldHaveEmbeddedVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != nullptr);
     mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
@@ -158,6 +215,13 @@
     EXPECT_STREQ(direct_interface1->GetDescriptor(&temp), "Ljava/io/Serializable;");
     mirror::Class* array_ptr = array->GetComponentType();
     EXPECT_EQ(class_linker_->FindArrayClass(self, &array_ptr), array.Get());
+
+    size_t pointer_size = class_linker_->GetImagePointerSize();
+    mirror::Class* JavaLangObject =
+        class_linker_->FindSystemClass(self, "Ljava/lang/Object;");
+    ImTable* JavaLangObject_imt = JavaLangObject->GetImt(pointer_size);
+    // IMT of a array class should be shared with the IMT of the java.lag.Object
+    ASSERT_EQ(JavaLangObject_imt, array->GetImt(pointer_size));
   }
 
   void AssertMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -713,45 +777,7 @@
 TEST_F(ClassLinkerTest, FindClass) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::Class* JavaLangObject = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
-  ASSERT_TRUE(JavaLangObject != nullptr);
-  ASSERT_TRUE(JavaLangObject->GetClass() != nullptr);
-  ASSERT_EQ(JavaLangObject->GetClass(), JavaLangObject->GetClass()->GetClass());
-  EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
-  std::string temp;
-  ASSERT_STREQ(JavaLangObject->GetDescriptor(&temp), "Ljava/lang/Object;");
-  EXPECT_TRUE(JavaLangObject->GetSuperClass() == nullptr);
-  EXPECT_FALSE(JavaLangObject->HasSuperClass());
-  EXPECT_TRUE(JavaLangObject->GetClassLoader() == nullptr);
-  EXPECT_EQ(mirror::Class::kStatusInitialized, JavaLangObject->GetStatus());
-  EXPECT_FALSE(JavaLangObject->IsErroneous());
-  EXPECT_TRUE(JavaLangObject->IsLoaded());
-  EXPECT_TRUE(JavaLangObject->IsResolved());
-  EXPECT_TRUE(JavaLangObject->IsVerified());
-  EXPECT_TRUE(JavaLangObject->IsInitialized());
-  EXPECT_FALSE(JavaLangObject->IsArrayInstance());
-  EXPECT_FALSE(JavaLangObject->IsArrayClass());
-  EXPECT_TRUE(JavaLangObject->GetComponentType() == nullptr);
-  EXPECT_FALSE(JavaLangObject->IsInterface());
-  EXPECT_TRUE(JavaLangObject->IsPublic());
-  EXPECT_FALSE(JavaLangObject->IsFinal());
-  EXPECT_FALSE(JavaLangObject->IsPrimitive());
-  EXPECT_FALSE(JavaLangObject->IsSynthetic());
-  EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
-  EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
-  if (!kUseBrooksReadBarrier) {
-    EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
-  } else {
-    EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
-  }
-  EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
-  EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
-  if (kUseBrooksReadBarrier) {
-    EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
-    EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
-  }
-
-  EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
-  EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
+  AssertObjectClass(JavaLangObject);
 
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader(
@@ -762,6 +788,7 @@
   ASSERT_TRUE(MyClass->GetClass() != nullptr);
   ASSERT_EQ(MyClass->GetClass(), MyClass->GetClass()->GetClass());
   EXPECT_EQ(JavaLangObject, MyClass->GetClass()->GetSuperClass());
+  std::string temp;
   ASSERT_STREQ(MyClass->GetDescriptor(&temp), "LMyClass;");
   EXPECT_TRUE(MyClass->GetSuperClass() == JavaLangObject);
   EXPECT_TRUE(MyClass->HasSuperClass());
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index d61d0aa..ab14655 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -600,9 +600,10 @@
       }
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-      ArtMethod* imt_method = (*this_object)->GetClass()->GetEmbeddedImTableEntry(
-          imt_index, class_linker->GetImagePointerSize());
+      uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize;
+      size_t pointer_size = class_linker->GetImagePointerSize();
+      ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
+          Get(imt_index, pointer_size);
       if (!imt_method->IsRuntimeMethod()) {
         if (kIsDebugBuild) {
           mirror::Class* klass = (*this_object)->GetClass();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 923ea1a..1152b94 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2165,13 +2165,13 @@
       dex_method_idx, sizeof(void*));
   DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
   ArtMethod* method = nullptr;
+  ImTable* imt = cls->GetImt(sizeof(void*));
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     // If the dex cache already resolved the interface method, look whether we have
     // a match in the ImtConflictTable.
     uint32_t imt_index = interface_method->GetDexMethodIndex();
-    ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
-        imt_index % mirror::Class::kImtSize, sizeof(void*));
+    ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
       ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
       DCHECK(current_table != nullptr);
@@ -2223,8 +2223,7 @@
   // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
   // We create a new table with the new pair { interface_method, method }.
   uint32_t imt_index = interface_method->GetDexMethodIndex();
-  ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
-      imt_index % mirror::Class::kImtSize, sizeof(void*));
+  ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
   if (conflict_method->IsRuntimeMethod()) {
     ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
         cls.Get(),
@@ -2235,9 +2234,9 @@
     if (new_conflict_method != conflict_method) {
       // Update the IMT if we create a new conflict method. No fence needed here, as the
       // data is consistent.
-      cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
-                                  new_conflict_method,
-                                  sizeof(void*));
+      imt->Set(imt_index % ImTable::kSize,
+               new_conflict_method,
+               sizeof(void*));
     }
   }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e896c7a..8cadc2e 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1130,6 +1130,10 @@
       image_header.VisitPackedArtFields(&field_visitor, target_base);
     }
     {
+      TimingLogger::ScopedTiming timing("Fixup imt", &logger);
+      image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
+    }
+    {
       TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
       image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
     }
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index ea75a62..cd0557a 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -20,6 +20,7 @@
 #include "image.h"
 
 #include "art_method.h"
+#include "imtable.h"
 
 namespace art {
 
@@ -45,6 +46,24 @@
 }
 
 template <typename Visitor>
+inline void ImageHeader::VisitPackedImTables(const Visitor& visitor,
+                                             uint8_t* base,
+                                             size_t pointer_size) const {
+  const ImageSection& section = GetImageSection(kSectionImTables);
+  for (size_t pos = 0; pos < section.Size();) {
+    ImTable* imt = reinterpret_cast<ImTable*>(base + section.Offset() + pos);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      ArtMethod* orig = imt->Get(i, pointer_size);
+      ArtMethod* updated = visitor(orig);
+      if (updated != orig) {
+        imt->Set(i, updated, pointer_size);
+      }
+    }
+    pos += ImTable::SizeInBytes(pointer_size);
+  }
+}
+
+template <typename Visitor>
 inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
                                                       uint8_t* base,
                                                       size_t pointer_size) const {
diff --git a/runtime/image.cc b/runtime/image.cc
index a9552c2..2362a92 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '9', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '0', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index 2ea9af7..06f06ee 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -195,6 +195,7 @@
     kSectionArtFields,
     kSectionArtMethods,
     kSectionRuntimeMethods,
+    kSectionImTables,
     kSectionIMTConflictTables,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
@@ -279,6 +280,11 @@
   void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
 
   template <typename Visitor>
+  void VisitPackedImTables(const Visitor& visitor,
+                           uint8_t* base,
+                           size_t pointer_size) const;
+
+  template <typename Visitor>
   void VisitPackedImtConflictTables(const Visitor& visitor,
                                     uint8_t* base,
                                     size_t pointer_size) const;
diff --git a/runtime/imtable.h b/runtime/imtable.h
new file mode 100644
index 0000000..51faf70
--- /dev/null
+++ b/runtime/imtable.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_IMTABLE_H_
+#define ART_RUNTIME_IMTABLE_H_
+
+#ifndef IMT_SIZE
+#error IMT_SIZE not defined
+#endif
+
+namespace art {
+
+class ArtMethod;
+
+class ImTable {
+ public:
+  // Interface method table size. Increasing this value reduces the chance of two interface methods
+  // colliding in the interface method table but increases the size of classes that implement
+  // (non-marker) interfaces.
+  static constexpr size_t kSize = IMT_SIZE;
+
+  ArtMethod* Get(size_t index, size_t pointer_size) {
+    DCHECK_LT(index, kSize);
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    if (pointer_size == 4) {
+      uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
+      return reinterpret_cast<ArtMethod*>(value);
+    } else {
+      uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
+      return reinterpret_cast<ArtMethod*>(value);
+    }
+  }
+
+  void Set(size_t index, ArtMethod* method, size_t pointer_size) {
+    DCHECK_LT(index, kSize);
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    if (pointer_size == 4) {
+      uintptr_t value = reinterpret_cast<uintptr_t>(method);
+      DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
+      *reinterpret_cast<uint32_t*>(ptr) = static_cast<uint32_t>(value);
+    } else {
+      *reinterpret_cast<uint64_t*>(ptr) = reinterpret_cast<uint64_t>(method);
+    }
+  }
+
+  static size_t OffsetOfElement(size_t index, size_t pointer_size) {
+    return index * pointer_size;
+  }
+
+  void Populate(ArtMethod** data, size_t pointer_size) {
+    for (size_t i = 0; i < kSize; ++i) {
+      Set(i, data[i], pointer_size);
+    }
+  }
+
+  constexpr static size_t SizeInBytes(size_t pointer_size) {
+    return kSize * pointer_size;
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_IMTABLE_H_
+
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 03d03d5..7dfa6e2 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -679,7 +679,7 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
+  CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
       vtable_idx, sizeof(void*));
   if (UNLIKELY(called_method == nullptr)) {
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index cefd9f0..b783a01 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -247,38 +247,19 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable);
 }
 
-inline MemberOffset Class::EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size) {
-  DCHECK_LT(i, kImtSize);
-  return MemberOffset(
-      EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
-}
-
-template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
-  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
-  return GetFieldPtrWithSize<ArtMethod*>(
-      EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
-}
-
-template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
-  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
-  SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
-}
-
 inline bool Class::HasVTable() {
-  return GetVTable() != nullptr || ShouldHaveEmbeddedImtAndVTable();
+  return GetVTable() != nullptr || ShouldHaveEmbeddedVTable();
 }
 
 inline int32_t Class::GetVTableLength() {
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableLength();
   }
   return GetVTable() != nullptr ? GetVTable()->GetLength() : 0;
 }
 
 inline ArtMethod* Class::GetVTableEntry(uint32_t i, size_t pointer_size) {
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableEntry(i, pointer_size);
   }
   auto* vtable = GetVTable();
@@ -294,6 +275,14 @@
   SetField32<false>(MemberOffset(EmbeddedVTableLengthOffset()), len);
 }
 
+inline ImTable* Class::GetImt(size_t pointer_size) {
+  return GetFieldPtrWithSize<ImTable*>(MemberOffset(ImtPtrOffset(pointer_size)), pointer_size);
+}
+
+inline void Class::SetImt(ImTable* imt, size_t pointer_size) {
+  return SetFieldPtrWithSize<false>(MemberOffset(ImtPtrOffset(pointer_size)), imt, pointer_size);
+}
+
 inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size) {
   return MemberOffset(
       EmbeddedVTableOffset(pointer_size).Uint32Value() + i * VTableEntrySize(pointer_size));
@@ -541,7 +530,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()) {
+  if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -552,7 +541,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size) {
   DCHECK(IsLoaded());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
                                            0, 0, 0, 0, 0, pointer_size);
@@ -711,7 +700,7 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
-inline uint32_t Class::ComputeClassSize(bool has_embedded_tables,
+inline uint32_t Class::ComputeClassSize(bool has_embedded_vtable,
                                         uint32_t num_vtable_entries,
                                         uint32_t num_8bit_static_fields,
                                         uint32_t num_16bit_static_fields,
@@ -722,11 +711,10 @@
   // Space used by java.lang.Class and its instance fields.
   uint32_t size = sizeof(Class);
   // Space used by embedded tables.
-  if (has_embedded_tables) {
-    const uint32_t embedded_imt_size = kImtSize * ImTableEntrySize(pointer_size);
-    const uint32_t embedded_vtable_size = num_vtable_entries * VTableEntrySize(pointer_size);
-    size = RoundUp(size + sizeof(uint32_t) /* embedded vtable len */, pointer_size) +
-        embedded_imt_size + embedded_vtable_size;
+  if (has_embedded_vtable) {
+    size = RoundUp(size + sizeof(uint32_t), pointer_size);
+    size += pointer_size;  // size of pointer to IMT
+    size += num_vtable_entries * VTableEntrySize(pointer_size);
   }
 
   // Space used by reference statics.
@@ -990,18 +978,9 @@
   return MakeIterationRangeFromLengthPrefixedArray(GetSFieldsPtrUnchecked());
 }
 
-inline MemberOffset Class::EmbeddedImTableOffset(size_t pointer_size) {
-  CheckPointerSize(pointer_size);
-  // Round up since we want the embedded imt and vtable to be pointer size aligned in case 64 bits.
-  // Add 32 bits for embedded vtable length.
-  return MemberOffset(
-      RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
-}
-
 inline MemberOffset Class::EmbeddedVTableOffset(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MemberOffset(EmbeddedImTableOffset(pointer_size).Uint32Value() +
-                      kImtSize * ImTableEntrySize(pointer_size));
+  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + pointer_size);
 }
 
 inline void Class::CheckPointerSize(size_t pointer_size) {
@@ -1086,7 +1065,7 @@
     dest->SetDexCacheStrings(new_strings);
   }
   // Fix up embedded tables.
-  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable<kVerifyNone, kReadBarrierOption>()) {
+  if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
       ArtMethod* new_method = visitor(method);
@@ -1094,16 +1073,9 @@
         dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
       }
     }
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      ArtMethod* method = GetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
-                                                                                    pointer_size);
-      ArtMethod* new_method = visitor(method);
-      if (method != new_method) {
-        dest->SetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
-                                                                        new_method,
-                                                                        pointer_size);
-      }
-    }
+  }
+  if (!IsTemp() && ShouldHaveImt<kVerifyNone, kReadBarrierOption>()) {
+    dest->SetImt(visitor(GetImt(pointer_size)), pointer_size);
   }
 }
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index b4a23ba..9c77d38 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -914,13 +914,7 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize],
-                                         size_t pointer_size) {
-  for (size_t i = 0; i < kImtSize; i++) {
-    auto method = methods[i];
-    DCHECK(method != nullptr);
-    SetEmbeddedImTableEntry(i, method, pointer_size);
-  }
+void Class::PopulateEmbeddedVTable(size_t pointer_size) {
   PointerArray* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   const size_t table_length = table->GetLength();
@@ -967,7 +961,7 @@
 class CopyClassVisitor {
  public:
   CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig, size_t new_length,
-                   size_t copy_bytes, ArtMethod* const (&imt)[mirror::Class::kImtSize],
+                   size_t copy_bytes, ImTable* imt,
                    size_t pointer_size)
       : self_(self), orig_(orig), new_length_(new_length),
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
@@ -979,7 +973,8 @@
     Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
     mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
     mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
-    h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_, pointer_size_);
+    h_new_class_obj->PopulateEmbeddedVTable(pointer_size_);
+    h_new_class_obj->SetImt(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
@@ -992,13 +987,13 @@
   Handle<mirror::Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
-  ArtMethod* const (&imt_)[mirror::Class::kImtSize];
+  ImTable* imt_;
   const size_t pointer_size_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
 Class* Class::CopyOf(Thread* self, int32_t new_length,
-                     ArtMethod* const (&imt)[mirror::Class::kImtSize], size_t pointer_size) {
+                     ImTable* imt, size_t pointer_size) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 5235a3e..f044b59 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -22,6 +22,7 @@
 #include "class_flags.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
+#include "imtable.h"
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
@@ -33,10 +34,6 @@
 #include "thread.h"
 #include "utils.h"
 
-#ifndef IMT_SIZE
-#error IMT_SIZE not defined
-#endif
-
 namespace art {
 
 class ArtField;
@@ -66,11 +63,6 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
-  // Interface method table size. Increasing this value reduces the chance of two interface methods
-  // colliding in the interface method table but increases the size of classes that implement
-  // (non-marker) interfaces.
-  static constexpr size_t kImtSize = IMT_SIZE;
-
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -351,7 +343,7 @@
   // be replaced with a class with the right size for embedded imt/vtable.
   bool IsTemp() SHARED_REQUIRES(Locks::mutator_lock_) {
     Status s = GetStatus();
-    return s < Status::kStatusResolving && ShouldHaveEmbeddedImtAndVTable();
+    return s < Status::kStatusResolving && ShouldHaveEmbeddedVTable();
   }
 
   String* GetName() SHARED_REQUIRES(Locks::mutator_lock_);  // Returns the cached name.
@@ -557,7 +549,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Compute how many bytes would be used a class with the given elements.
-  static uint32_t ComputeClassSize(bool has_embedded_tables,
+  static uint32_t ComputeClassSize(bool has_embedded_vtable,
                                    uint32_t num_vtable_entries,
                                    uint32_t num_8bit_static_fields,
                                    uint32_t num_16bit_static_fields,
@@ -830,28 +822,27 @@
     return MemberOffset(sizeof(Class));
   }
 
+  static MemberOffset ImtPtrOffset(size_t pointer_size) {
+    return MemberOffset(
+        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
+  }
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool ShouldHaveImt() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>();
+  }
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool ShouldHaveEmbeddedVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static MemberOffset EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size);
-
   static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
 
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   int32_t GetVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* GetVTableEntry(uint32_t i, size_t pointer_size)
@@ -861,6 +852,10 @@
 
   void SetEmbeddedVTableLength(int32_t len) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ImTable* GetImt(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetImt(ImTable* imt, size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
   ArtMethod* GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -870,7 +865,7 @@
   inline void SetEmbeddedVTableEntryUnchecked(uint32_t i, ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize], size_t pointer_size)
+  void PopulateEmbeddedVTable(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
@@ -1195,7 +1190,7 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length, ArtMethod* const (&imt)[mirror::Class::kImtSize],
+  Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
                 size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -1322,10 +1317,7 @@
 
   // Check that the pointer size matches the one in the class linker.
   ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
-
-  static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
   static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
-
   template <bool kVisitNativeRoots,
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 46be5e6..d4ad0ea 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -378,13 +378,13 @@
   // TODO: Verify the dex location is well formed, and throw an IOException if
   // not?
 
-  OatFileAssistant oat_file_assistant(filename, target_instruction_set, profile_changed, false);
+  OatFileAssistant oat_file_assistant(filename, target_instruction_set, false);
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
     return OatFileAssistant::kNoDexOptNeeded;
   }
-  return oat_file_assistant.GetDexOptNeeded(filter);
+  return oat_file_assistant.GetDexOptNeeded(filter, profile_changed);
 }
 
 static jstring DexFile_getDexFileStatus(JNIEnv* env,
@@ -411,7 +411,6 @@
   }
 
   OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
-                                      false /* profile_changed */,
                                       false /* load_executable */);
 
   std::ostringstream status;
@@ -486,7 +485,7 @@
     return JNI_FALSE;
   }
 
-  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false);
   return oat_file_assistant.IsUpToDate() ? JNI_FALSE : JNI_TRUE;
 }
 
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1aa789f..198a52e 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -46,6 +46,16 @@
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
+
+  // Even if Yama is on a non-privileged native debugger should
+  // be able to attach to the debuggable app.
+  if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
+    // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
+    // case since it's expected behaviour.
+    if (errno != EINVAL) {
+      PLOG(ERROR) << "prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed for pid " << getpid();
+    }
+  }
 #endif
   // We don't want core dumps, though, so set the core dump size to 0.
   rlimit rl;
@@ -168,12 +178,17 @@
     // Only restart if it was streaming mode.
     // TODO: Expose buffer size, so we can also do file mode.
     if (output_mode == Trace::TraceOutputMode::kStreaming) {
-      const char* proc_name_cutils = get_process_name();
+      static constexpr size_t kMaxProcessNameLength = 100;
+      char name_buf[kMaxProcessNameLength] = {};
+      int rc = pthread_getname_np(pthread_self(), name_buf, kMaxProcessNameLength);
       std::string proc_name;
-      if (proc_name_cutils != nullptr) {
-        proc_name = proc_name_cutils;
+
+      if (rc == 0) {
+          // On success use the pthread name.
+          proc_name = name_buf;
       }
-      if (proc_name_cutils == nullptr || proc_name == "zygote" || proc_name == "zygote64") {
+
+      if (proc_name.empty() || proc_name == "zygote" || proc_name == "zygote64") {
         // Either no process name, or the name hasn't been changed, yet. Just use pid.
         pid_t pid = getpid();
         proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 218c490..aae9d97 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -64,17 +64,15 @@
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
-                                   bool profile_changed,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, profile_changed, load_executable)
+    : OatFileAssistant(dex_location, nullptr, isa, load_executable)
 { }
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const char* oat_location,
                                    const InstructionSet isa,
-                                   bool profile_changed,
                                    bool load_executable)
-    : isa_(isa), profile_changed_(profile_changed), load_executable_(load_executable) {
+    : isa_(isa), load_executable_(load_executable) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
   dex_location_.assign(dex_location);
 
@@ -89,7 +87,6 @@
   if (oat_location != nullptr) {
     cached_oat_file_name_ = std::string(oat_location);
     cached_oat_file_name_attempted_ = true;
-    cached_oat_file_name_found_ = true;
   }
 }
 
@@ -134,29 +131,43 @@
   return true;
 }
 
-bool OatFileAssistant::OatFileCompilerFilterIsOkay(CompilerFilter::Filter target) {
+static bool GivenOatFileCompilerFilterIsOkay(const OatFile& oat_file,
+                                             CompilerFilter::Filter target,
+                                             bool profile_changed) {
+  CompilerFilter::Filter current = oat_file.GetCompilerFilter();
+
+  if (profile_changed && CompilerFilter::DependsOnProfile(current)) {
+    VLOG(oat) << "Compiler filter not okay because Profile changed";
+    return false;
+  }
+  return CompilerFilter::IsAsGoodAs(current, target);
+}
+
+bool OatFileAssistant::OatFileCompilerFilterIsOkay(CompilerFilter::Filter target,
+                                                   bool profile_changed) {
   const OatFile* oat_file = GetOatFile();
   if (oat_file != nullptr) {
-    CompilerFilter::Filter current = oat_file->GetCompilerFilter();
-    return CompilerFilter::IsAsGoodAs(current, target);
+    return GivenOatFileCompilerFilterIsOkay(*oat_file, target, profile_changed);
   }
   return false;
 }
 
-bool OatFileAssistant::OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target) {
+bool OatFileAssistant::OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target,
+                                                    bool profile_changed) {
   const OatFile* odex_file = GetOdexFile();
   if (odex_file != nullptr) {
-    CompilerFilter::Filter current = odex_file->GetCompilerFilter();
-    return CompilerFilter::IsAsGoodAs(current, target);
+    return GivenOatFileCompilerFilterIsOkay(*odex_file, target, profile_changed);
   }
   return false;
 }
 
-OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target) {
+OatFileAssistant::DexOptNeeded
+OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target,
+                                  bool profile_changed) {
   bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
 
   // See if the oat file is in good shape as is.
-  bool oat_okay = OatFileCompilerFilterIsOkay(target);
+  bool oat_okay = OatFileCompilerFilterIsOkay(target, profile_changed);
   if (oat_okay) {
     if (compilation_desired) {
       if (OatFileIsUpToDate()) {
@@ -170,7 +181,7 @@
   }
 
   // See if the odex file is in good shape as is.
-  bool odex_okay = OdexFileCompilerFilterIsOkay(target);
+  bool odex_okay = OdexFileCompilerFilterIsOkay(target, profile_changed);
   if (odex_okay) {
     if (compilation_desired) {
       if (OdexFileIsUpToDate()) {
@@ -225,13 +236,13 @@
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::MakeUpToDate(std::string* error_msg) {
+OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) {
   CompilerFilter::Filter target;
   if (!GetRuntimeCompilerFilterOption(&target, error_msg)) {
     return kUpdateNotAttempted;
   }
 
-  switch (GetDexOptNeeded(target)) {
+  switch (GetDexOptNeeded(target, profile_changed)) {
     case kNoDexOptNeeded: return kUpdateSucceeded;
     case kDex2OatNeeded: return GenerateOatFile(error_msg);
     case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
@@ -345,15 +356,13 @@
     cached_odex_file_name_attempted_ = true;
 
     std::string error_msg;
-    cached_odex_file_name_found_ = DexFilenameToOdexFilename(
-        dex_location_, isa_, &cached_odex_file_name_, &error_msg);
-    if (!cached_odex_file_name_found_) {
+    if (!DexFilenameToOdexFilename(dex_location_, isa_, &cached_odex_file_name_, &error_msg)) {
       // If we can't figure out the odex file, we treat it as if the odex
       // file was inaccessible.
       LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
     }
   }
-  return cached_odex_file_name_found_ ? &cached_odex_file_name_ : nullptr;
+  return cached_odex_file_name_.empty() ? nullptr : &cached_odex_file_name_;
 }
 
 bool OatFileAssistant::OdexFileExists() {
@@ -361,26 +370,20 @@
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
-  if (OdexFileIsOutOfDate()) {
-    return kOatOutOfDate;
+  if (!odex_file_status_attempted_) {
+    odex_file_status_attempted_ = true;
+    const OatFile* odex_file = GetOdexFile();
+    if (odex_file == nullptr) {
+      cached_odex_file_status_ = kOatOutOfDate;
+    } else {
+      cached_odex_file_status_ = GivenOatFileStatus(*odex_file);
+    }
   }
-  if (OdexFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return cached_odex_file_status_;
 }
 
 bool OatFileAssistant::OdexFileIsOutOfDate() {
-  if (!odex_file_is_out_of_date_attempted_) {
-    odex_file_is_out_of_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_out_of_date_ = true;
-    } else {
-      cached_odex_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_out_of_date_;
+  return OdexFileStatus() == kOatOutOfDate;
 }
 
 bool OatFileAssistant::OdexFileNeedsRelocation() {
@@ -388,16 +391,7 @@
 }
 
 bool OatFileAssistant::OdexFileIsUpToDate() {
-  if (!odex_file_is_up_to_date_attempted_) {
-    odex_file_is_up_to_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_up_to_date_ = false;
-    } else {
-      cached_odex_file_is_up_to_date_ = GivenOatFileIsUpToDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_up_to_date_;
+  return OdexFileStatus() == kOatUpToDate;
 }
 
 CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
@@ -406,7 +400,8 @@
 
   return odex_file->GetCompilerFilter();
 }
-std::string OatFileAssistant::ArtFileName(const OatFile* oat_file) const {
+
+static std::string ArtFileName(const OatFile* oat_file) {
   const std::string oat_file_location = oat_file->GetLocation();
   // Replace extension with .art
   const size_t last_ext = oat_file_location.find_last_of('.');
@@ -428,16 +423,15 @@
     std::string cache_dir = StringPrintf("%s%s",
         DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
     std::string error_msg;
-    cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_.c_str(),
-        cache_dir.c_str(), &cached_oat_file_name_, &error_msg);
-    if (!cached_oat_file_name_found_) {
+    if (!GetDalvikCacheFilename(dex_location_.c_str(),
+        cache_dir.c_str(), &cached_oat_file_name_, &error_msg)) {
       // If we can't determine the oat file name, we treat the oat file as
       // inaccessible.
       LOG(WARNING) << "Failed to determine oat file name for dex location "
         << dex_location_ << ": " << error_msg;
     }
   }
-  return cached_oat_file_name_found_ ? &cached_oat_file_name_ : nullptr;
+  return cached_oat_file_name_.empty() ? nullptr : &cached_oat_file_name_;
 }
 
 bool OatFileAssistant::OatFileExists() {
@@ -445,26 +439,20 @@
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
-  if (OatFileIsOutOfDate()) {
-    return kOatOutOfDate;
+  if (!oat_file_status_attempted_) {
+    oat_file_status_attempted_ = true;
+    const OatFile* oat_file = GetOatFile();
+    if (oat_file == nullptr) {
+      cached_oat_file_status_ = kOatOutOfDate;
+    } else {
+      cached_oat_file_status_ = GivenOatFileStatus(*oat_file);
+    }
   }
-  if (OatFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return cached_oat_file_status_;
 }
 
 bool OatFileAssistant::OatFileIsOutOfDate() {
-  if (!oat_file_is_out_of_date_attempted_) {
-    oat_file_is_out_of_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_out_of_date_ = true;
-    } else {
-      cached_oat_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_out_of_date_;
+  return OatFileStatus() == kOatOutOfDate;
 }
 
 bool OatFileAssistant::OatFileNeedsRelocation() {
@@ -472,16 +460,7 @@
 }
 
 bool OatFileAssistant::OatFileIsUpToDate() {
-  if (!oat_file_is_up_to_date_attempted_) {
-    oat_file_is_up_to_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_up_to_date_ = false;
-    } else {
-      cached_oat_file_is_up_to_date_ = GivenOatFileIsUpToDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_up_to_date_;
+  return OatFileStatus() == kOatUpToDate;
 }
 
 CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
@@ -492,19 +471,6 @@
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
-  // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
-  // is more work than we need to do. If performance becomes a concern, and
-  // this method is actually called, this should be fixed.
-  if (GivenOatFileIsOutOfDate(file)) {
-    return kOatOutOfDate;
-  }
-  if (GivenOatFileIsUpToDate(file)) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
   // what we provide, which verifies the primary dex checksum for us.
@@ -512,7 +478,7 @@
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
       dex_location_.c_str(), dex_checksum_pointer, false);
   if (oat_dex_file == nullptr) {
-    return true;
+    return kOatOutOfDate;
   }
 
   // Verify the dex checksums for any secondary multidex files
@@ -537,7 +503,7 @@
           << secondary_dex_location
           << ". Expected: " << expected_secondary_checksum
           << ", Actual: " << actual_secondary_checksum;
-        return true;
+        return kOatOutOfDate;
       }
     } else {
       // If we can't get the checksum for the secondary location, we assume
@@ -557,7 +523,7 @@
       VLOG(oat) << "No image for oat image checksum to match against.";
 
       if (HasOriginalDexFiles()) {
-        return true;
+        return kOatOutOfDate;
       }
 
       // If there is no original dex file to fall back to, grudgingly accept
@@ -571,45 +537,18 @@
     } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
         != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
-      return true;
+      return kOatOutOfDate;
     }
   } else {
     VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter;
   }
 
-  // Verify the profile hasn't changed recently.
-  // TODO: Move this check to OatFileCompilerFilterIsOkay? Nothing bad should
-  // happen if we use an oat file compiled with an out-of-date profile.
-  if (CompilerFilter::DependsOnProfile(current_compiler_filter)) {
-    if (profile_changed_) {
-      VLOG(oat) << "The profile has changed recently.";
-      return true;
-    }
-  } else {
-    VLOG(oat) << "Profile check skipped for compiler filter " << current_compiler_filter;
-  }
-
-  // Everything looks good; the dex file is not out of date.
-  return false;
-}
-
-bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
-  return GivenOatFileStatus(file) == kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
-  if (GivenOatFileIsOutOfDate(file)) {
-    return false;
-  }
-
-  CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
-
   if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
     if (!file.IsPic()) {
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
         VLOG(oat) << "No image to check oat relocation against.";
-        return false;
+        return kOatNeedsRelocation;
       }
 
       // Verify the oat_data_begin recorded for the image in the oat file matches
@@ -621,7 +560,7 @@
           ": Oat file image oat_data_begin (" << oat_data_begin << ")"
           << " does not match actual image oat_data_begin ("
           << image_info->oat_data_begin << ")";
-        return false;
+        return kOatNeedsRelocation;
       }
 
       // Verify the oat_patch_delta recorded for the image in the oat file matches
@@ -632,7 +571,7 @@
           ": Oat file image patch delta (" << oat_patch_delta << ")"
           << " does not match actual image patch delta ("
           << image_info->patch_delta << ")";
-        return false;
+        return kOatNeedsRelocation;
       }
     } else {
       // Oat files compiled in PIC mode do not require relocation.
@@ -641,7 +580,7 @@
   } else {
     VLOG(oat) << "Oat relocation test skipped for compiler filter " << current_compiler_filter;
   }
-  return true;
+  return kOatUpToDate;
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
@@ -946,8 +885,7 @@
 void OatFileAssistant::ClearOdexFileCache() {
   odex_file_load_attempted_ = false;
   cached_odex_file_.reset();
-  odex_file_is_out_of_date_attempted_ = false;
-  odex_file_is_up_to_date_attempted_ = false;
+  odex_file_status_attempted_ = false;
 }
 
 const OatFile* OatFileAssistant::GetOatFile() {
@@ -987,8 +925,7 @@
 void OatFileAssistant::ClearOatFileCache() {
   oat_file_load_attempted_ = false;
   cached_oat_file_.reset();
-  oat_file_is_out_of_date_attempted_ = false;
-  oat_file_is_up_to_date_attempted_ = false;
+  oat_file_status_attempted_ = false;
 }
 
 const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index d55e373..e4aba3f 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -101,14 +101,10 @@
   // device. For example, on an arm device, use arm or arm64. An oat file can
   // be loaded executable only if the ISA matches the current runtime.
   //
-  // profile_changed should be true if the profile has recently changed
-  // for this dex location.
-  //
   // load_executable should be true if the caller intends to try and load
   // executable code for this dex location.
   OatFileAssistant(const char* dex_location,
                    const InstructionSet isa,
-                   bool profile_changed,
                    bool load_executable);
 
   // Constructs an OatFileAssistant, providing an explicit target oat_location
@@ -116,7 +112,6 @@
   OatFileAssistant(const char* dex_location,
                    const char* oat_location,
                    const InstructionSet isa,
-                   bool profile_changed,
                    bool load_executable);
 
   ~OatFileAssistant();
@@ -145,8 +140,10 @@
 
   // Return what action needs to be taken to produce up-to-date code for this
   // dex location that is at least as good as an oat file generated with the
-  // given compiler filter.
-  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter);
+  // given compiler filter. profile_changed should be true to indicate the
+  // profile has recently changed for this dex location.
+  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter,
+                               bool profile_changed = false);
 
   // Returns true if there is up-to-date code for this dex location,
   // irrespective of the compiler filter of the up-to-date code.
@@ -164,11 +161,15 @@
 
   // Attempts to generate or relocate the oat file as needed to make it up to
   // date based on the current runtime and compiler options.
+  // profile_changed should be true to indicate the profile has recently
+  // changed for this dex location.
+  //
+  // Returns the result of attempting to update the code.
   //
   // If the result is not kUpdateSucceeded, the value of error_msg will be set
   // to a string describing why there was a failure or the update was not
   // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate MakeUpToDate(std::string* error_msg);
+  ResultOfAttemptToUpdate MakeUpToDate(bool profile_changed, std::string* error_msg);
 
   // Returns an oat file that can be used for loading dex files.
   // Returns null if no suitable oat file was found.
@@ -179,7 +180,7 @@
   std::unique_ptr<OatFile> GetBestOatFile();
 
   // Open and returns an image space associated with the oat file.
-  gc::space::ImageSpace* OpenImageSpace(const OatFile* oat_file);
+  static gc::space::ImageSpace* OpenImageSpace(const OatFile* oat_file);
 
   // Loads the dex files in the given oat file for the given dex location.
   // The oat file should be up to date for the given dex location.
@@ -238,15 +239,9 @@
   // |OatFileExists() == true|.
   CompilerFilter::Filter OatFileCompilerFilter();
 
-  // Return image file name. Does not cache since it relies on the oat file.
-  std::string ArtFileName(const OatFile* oat_file) const;
-
-  // These methods return the status for a given opened oat file with respect
-  // to the dex location.
+  // Return the status for a given opened oat file with respect to the dex
+  // location.
   OatStatus GivenOatFileStatus(const OatFile& file);
-  bool GivenOatFileIsOutOfDate(const OatFile& file);
-  bool GivenOatFileNeedsRelocation(const OatFile& file);
-  bool GivenOatFileIsUpToDate(const OatFile& file);
 
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
@@ -282,7 +277,8 @@
   // Constructs the odex file name for the given dex location.
   // Returns true on success, in which case odex_filename is set to the odex
   // file name.
-  // Returns false on error, in which case error_msg describes the error.
+  // Returns false on error, in which case error_msg describes the error and
+  // odex_filename is not changed.
   // Neither odex_filename nor error_msg may be null.
   static bool DexFilenameToOdexFilename(const std::string& location,
       InstructionSet isa, std::string* odex_filename, std::string* error_msg);
@@ -324,8 +320,9 @@
   const OatFile* GetOdexFile();
 
   // Returns true if the compiler filter used to generate the odex file is at
-  // least as good as the given target filter.
-  bool OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target);
+  // least as good as the given target filter. profile_changed should be true
+  // to indicate the profile has recently changed for this dex location.
+  bool OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
 
   // Returns true if the odex file is opened executable.
   bool OdexFileIsExecutable();
@@ -343,8 +340,9 @@
   const OatFile* GetOatFile();
 
   // Returns true if the compiler filter used to generate the oat file is at
-  // least as good as the given target filter.
-  bool OatFileCompilerFilterIsOkay(CompilerFilter::Filter target);
+  // least as good as the given target filter. profile_changed should be true
+  // to indicate the profile has recently changed for this dex location.
+  bool OatFileCompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
 
   // Returns true if the oat file is opened executable.
   bool OatFileIsExecutable();
@@ -375,9 +373,6 @@
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
 
-  // Whether the profile has recently changed.
-  bool profile_changed_ = false;
-
   // Whether we will attempt to load oat files executable.
   bool load_executable_ = false;
 
@@ -390,8 +385,9 @@
 
   // Cached value of the odex file name.
   // This should be accessed only by the OdexFileName() method.
+  // The sentinel value "" is used if the odex file name could not be
+  // determined.
   bool cached_odex_file_name_attempted_ = false;
-  bool cached_odex_file_name_found_;
   std::string cached_odex_file_name_;
 
   // Cached value of the loaded odex file.
@@ -400,18 +396,15 @@
   bool odex_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_odex_file_;
 
-  // Cached results for OdexFileIsOutOfDate
-  bool odex_file_is_out_of_date_attempted_ = false;
-  bool cached_odex_file_is_out_of_date_;
-
-  // Cached results for OdexFileIsUpToDate
-  bool odex_file_is_up_to_date_attempted_ = false;
-  bool cached_odex_file_is_up_to_date_;
+  // Cached results for OdexFileStatus
+  bool odex_file_status_attempted_ = false;
+  OatStatus cached_odex_file_status_;
 
   // Cached value of the oat file name.
   // This should be accessed only by the OatFileName() method.
+  // The sentinel value "" is used if the oat file name could not be
+  // determined.
   bool cached_oat_file_name_attempted_ = false;
-  bool cached_oat_file_name_found_;
   std::string cached_oat_file_name_;
 
   // Cached value of the loaded oat file.
@@ -420,13 +413,9 @@
   bool oat_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_oat_file_;
 
-  // Cached results for OatFileIsOutOfDate
-  bool oat_file_is_out_of_date_attempted_ = false;
-  bool cached_oat_file_is_out_of_date_;
-
-  // Cached results for OatFileIsUpToDate
-  bool oat_file_is_up_to_date_attempted_ = false;
-  bool cached_oat_file_is_up_to_date_;
+  // Cached results for OatFileStatus
+  bool oat_file_status_attempted_ = false;
+  OatStatus cached_oat_file_status_;
 
   // Cached value of the image info.
   // Use the GetImageInfo method rather than accessing these directly.
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index a1d3ed9..6bccea6 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -213,7 +213,7 @@
 // generation of oat files.
 static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
   // Use an oat file assistant to find the proper oat location.
-  OatFileAssistant ofa(dex_location, kRuntimeISA, false, false);
+  OatFileAssistant ofa(dex_location, kRuntimeISA, false);
   const std::string* oat_location = ofa.OatFileName();
   ASSERT_TRUE(oat_location != nullptr);
 
@@ -245,7 +245,7 @@
   std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -275,7 +275,7 @@
 TEST_F(OatFileAssistantTest, NoDexNoOat) {
   std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -283,7 +283,7 @@
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(&error_msg));
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -297,7 +297,7 @@
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -321,18 +321,23 @@
 }
 
 // Case: We have a DEX file and speed-profile OAT file for it.
-// Expect: The status is kNoDexOptNeeded if the profile hasn't changed.
+// Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
+// kDex2Oat if the profile has changed.
 TEST_F(OatFileAssistantTest, ProfileOatUpToDate) {
   std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -346,32 +351,6 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
-// Case: We have a DEX file and speed-profile OAT file for it.
-// Expect: The status is kNoDex2OatNeeded if the profile has changed.
-TEST_F(OatFileAssistantTest, ProfileOatOutOfDate) {
-  std::string dex_location = GetScratchDir() + "/ProfileOatOutOfDate.jar";
-  Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
-
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true, false);
-
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-
-  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
-  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
-}
-
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
@@ -379,9 +358,9 @@
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // Verify we can load both dex files.
@@ -406,9 +385,9 @@
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -435,7 +414,7 @@
   // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
                                       oat_location.c_str(),
-                                      kRuntimeISA, false, true);
+                                      kRuntimeISA, true);
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -455,7 +434,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -482,7 +461,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -518,7 +497,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -536,7 +515,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -577,7 +556,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -600,7 +579,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -635,7 +614,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -658,7 +637,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -686,7 +665,7 @@
   GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
@@ -710,7 +689,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -746,7 +725,7 @@
   GenerateNoPatchOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -755,7 +734,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -785,7 +764,7 @@
 
   // Verify things don't go bad.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -820,7 +799,7 @@
   GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -848,7 +827,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -874,7 +853,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -893,7 +872,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kInterpretOnly);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -912,7 +891,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -932,11 +911,11 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -948,7 +927,7 @@
   EXPECT_TRUE(OS::FileExists(oat_location.c_str()));
 
   // Verify it didn't create an oat in the default location.
-  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_FALSE(ofm.OatFileExists());
 }
 
@@ -964,11 +943,11 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(&error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() == nullptr);
@@ -981,7 +960,7 @@
   std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
@@ -1031,7 +1010,7 @@
   Copy(GetDexSrc1(), abs_dex_location);
 
   std::string dex_location = MakePathRelative(abs_dex_location);
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -1049,7 +1028,7 @@
 TEST_F(OatFileAssistantTest, ShortDexLocation) {
   std::string dex_location = "/xx";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -1066,7 +1045,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -1076,7 +1055,7 @@
   std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -1173,7 +1152,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1195,7 +1174,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1209,12 +1188,12 @@
   std::string dex_location = GetScratchDir() + "/RuntimeCompilerFilterOptionUsed.jar";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -1222,7 +1201,7 @@
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -1230,7 +1209,7 @@
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=bogus");
   EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(&error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
 }
 
 TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index b7e6040..7680517 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -558,7 +558,6 @@
   OatFileAssistant oat_file_assistant(dex_location,
                                       oat_location,
                                       kRuntimeISA,
-                                      /*profile_changed*/false,
                                       !runtime->IsAotCompiler());
 
   // Lock the target oat location to avoid races generating and loading the
@@ -576,7 +575,7 @@
     // Update the oat file on disk if we can, based on the --compiler-filter
     // option derived from the current runtime options.
     // This may fail, but that's okay. Best effort is all that matters here.
-    switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
+    switch (oat_file_assistant.MakeUpToDate(/*profile_changed*/false, /*out*/ &error_msg)) {
       case OatFileAssistant::kUpdateFailed:
         LOG(WARNING) << error_msg;
         break;
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
index 5c71da6..ad91cde 100644
--- a/runtime/simulator/Android.mk
+++ b/runtime/simulator/Android.mk
@@ -65,8 +65,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index bf561e9..06f193a 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -808,11 +808,21 @@
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(Math.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(Math.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(Math.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(Math.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(Math.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(Math.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
     Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
     Assert.assertEquals(Math.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(Math.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -846,6 +856,10 @@
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(Math.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
@@ -1153,11 +1167,21 @@
     Assert.assertEquals(StrictMath.round(-2.9d), -3l);
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
     Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(StrictMath.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(StrictMath.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(StrictMath.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(StrictMath.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(StrictMath.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(StrictMath.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
     Assert.assertEquals(StrictMath.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
     Assert.assertEquals(StrictMath.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(StrictMath.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -1191,6 +1215,10 @@
     Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(StrictMath.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 11de660..2b77b29 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -12,7 +12,6 @@
 JNI_OnUnload called
 null
 loader null false
-loader null false
 JNI_OnLoad called
 JNI_OnUnload called
 null
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 17a6049..9ed8d28 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -37,8 +37,6 @@
         try {
             testUnloadClass(constructor);
             testUnloadLoader(constructor);
-            // Test that we don't unload if we have a Method keeping the class live.
-            testNoUnloadInvoke(constructor);
             // Test that we don't unload if we have an instance.
             testNoUnloadInstance(constructor);
             // Test JNI_OnLoad and JNI_OnUnload.
@@ -79,10 +77,10 @@
     }
 
     private static void testUnloadClass(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
+        WeakReference<Class> klass = setUpUnloadClassWeak(constructor);
         // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
-        WeakReference<Class> klass2 = setUpUnloadClass(constructor);
+        WeakReference<Class> klass2 = setUpUnloadClassWeak(constructor);
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(klass.get());
@@ -99,12 +97,14 @@
     }
 
     private static void testStackTrace(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
-        Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace");
-        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get());
+        Class klass = setUpUnloadClass(constructor);
+        WeakReference<Class> weak_klass = new WeakReference(klass);
+        Method stackTraceMethod = klass.getDeclaredMethod("generateStackTrace");
+        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass);
         stackTraceMethod = null;
+        klass = null;
         Runtime.getRuntime().gc();
-        boolean isNull = klass.get() == null;
+        boolean isNull = weak_klass.get() == null;
         System.out.println("class null " + isNull + " " + throwable.getMessage());
     }
 
@@ -116,28 +116,37 @@
         System.out.println(loader.get());
     }
 
-    private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
-        boolean isNull = loader.get() == null;
-        System.out.println("loader null " + isNull);
+    private static Object testNoUnloadHelper(ClassLoader loader) throws Exception {
+        Class intHolder = loader.loadClass("IntHolder");
+        return intHolder.newInstance();
+    }
+
+    static class Pair {
+      public Pair(Object o, ClassLoader l) {
+        object = o;
+        classLoader = new WeakReference<ClassLoader>(l);
+      }
+
+      public Object object;
+      public WeakReference<ClassLoader> classLoader;
+    }
+
+    private static Pair testNoUnloadInstanceHelper(Constructor constructor) throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Object o = testNoUnloadHelper(loader);
+        return new Pair(o, loader);
     }
 
     private static void testNoUnloadInstance(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        Object o = intHolder.get().newInstance();
+        Pair p = testNoUnloadInstanceHelper(constructor);
         Runtime.getRuntime().gc();
-        boolean isNull = loader.get() == null;
+        // If the class loader was unloded too early due to races, just pass the test.
+        boolean isNull = p.classLoader.get() == null;
         System.out.println("loader null " + isNull);
     }
 
-    private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
+    private static Class setUpUnloadClass(Constructor constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
         Class intHolder = loader.loadClass("IntHolder");
@@ -149,7 +158,12 @@
         setValue.invoke(intHolder, 2);
         System.out.println((int) getValue.invoke(intHolder));
         waitForCompilation(intHolder);
-        return new WeakReference(intHolder);
+        return intHolder;
+    }
+
+    private static WeakReference<Class> setUpUnloadClassWeak(Constructor constructor)
+            throws Exception {
+        return new WeakReference<Class>(setUpUnloadClass(constructor));
     }
 
     private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index c717eaa..359d521 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -1971,8 +1971,165 @@
     return (value >> temp) + temp;
   }
 
-public static void main(String[] args) {
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Y>>]
+
+  public static int $noinline$intAddSubSimplifyArg1(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intAddSubSimplifyArg2(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyLeft(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyRight(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return y + sub;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg1(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg2(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyLeft(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyRight(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return y + sub;
+  }
+
+ public static void main(String[] args) {
     int arg = 123456;
+    float floatArg = 123456.125f;
 
     assertLongEquals(arg, $noinline$Add0(arg));
     assertIntEquals(5, $noinline$AddAddSubAddConst(1));
@@ -2143,6 +2300,15 @@
     assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
     assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
     assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
+
+    assertIntEquals(654321, $noinline$intAddSubSimplifyArg1(arg, 654321));
+    assertIntEquals(arg, $noinline$intAddSubSimplifyArg2(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyLeft(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyRight(arg, 654321));
+    assertFloatEquals(654321.125f, $noinline$floatAddSubSimplifyArg1(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatAddSubSimplifyArg2(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyLeft(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyRight(floatArg, 654321.125f));
   }
 
   private static boolean $inline$true() { return true; }
diff --git a/test/566-polymorphic-inlining/polymorphic_inline.cc b/test/566-polymorphic-inlining/polymorphic_inline.cc
index c0d93dd..9f4c6c9 100644
--- a/test/566-polymorphic-inlining/polymorphic_inline.cc
+++ b/test/566-polymorphic-inlining/polymorphic_inline.cc
@@ -81,6 +81,7 @@
 
   do_checks(cls, "testInvokeVirtual");
   do_checks(cls, "testInvokeInterface");
+  do_checks(cls, "testInvokeInterface2");
   do_checks(cls, "$noinline$testInlineToSameTarget");
 }
 
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
index d39e6ed..53852a4 100644
--- a/test/566-polymorphic-inlining/src/Main.java
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -16,6 +16,8 @@
 
 interface Itf {
   public Class sameInvokeInterface();
+  public Class sameInvokeInterface2();
+  public Class sameInvokeInterface3();
 }
 
 public class Main implements Itf {
@@ -50,6 +52,8 @@
       testInvokeVirtual(mains[1]);
       testInvokeInterface(itfs[0]);
       testInvokeInterface(itfs[1]);
+      testInvokeInterface2(itfs[0]);
+      testInvokeInterface2(itfs[1]);
       $noinline$testInlineToSameTarget(mains[0]);
       $noinline$testInlineToSameTarget(mains[1]);
     }
@@ -64,9 +68,13 @@
     assertEquals(Itf.class, testInvokeInterface(itfs[0]));
     assertEquals(Itf.class, testInvokeInterface(itfs[1]));
 
+    assertEquals(Itf.class, testInvokeInterface2(itfs[0]));
+    assertEquals(Itf.class, testInvokeInterface2(itfs[1]));
+
     // This will trigger a deoptimization of the compiled code.
     assertEquals(OtherSubclass.class, testInvokeVirtual(mains[2]));
     assertEquals(OtherSubclass.class, testInvokeInterface(itfs[2]));
+    assertEquals(null, testInvokeInterface2(itfs[2]));
 
     // Run this once to make sure we execute the JITted code.
     $noinline$testInlineToSameTarget(mains[0]);
@@ -83,10 +91,28 @@
     return Itf.class;
   }
 
+  public Class sameInvokeInterface2() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
+  public Class sameInvokeInterface3() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
   public static Class testInvokeInterface(Itf i) {
     return i.sameInvokeInterface();
   }
 
+  public static Class testInvokeInterface2(Itf i) {
+    // Make three interface calls that will do a ClassTableGet to ensure bogus code
+    // generation of ClassTableGet will crash.
+    i.sameInvokeInterface();
+    i.sameInvokeInterface2();
+    return i.sameInvokeInterface3();
+  }
+
   public static Class testInvokeVirtual(Main m) {
     return m.sameInvokeVirtual();
   }
@@ -120,4 +146,11 @@
   public Class sameInvokeInterface() {
     return OtherSubclass.class;
   }
+
+  public Class sameInvokeInterface2() {
+    return null;
+  }
+  public Class sameInvokeInterface3() {
+    return null;
+  }
 }
diff --git a/test/609-checker-x86-bounds-check/expected.txt b/test/609-checker-x86-bounds-check/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/609-checker-x86-bounds-check/info.txt b/test/609-checker-x86-bounds-check/info.txt
new file mode 100644
index 0000000..c0f26d0
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/info.txt
@@ -0,0 +1 @@
+Checker test that we combine ArrayLength and BoundsCheck on x86/x86_64.
diff --git a/test/609-checker-x86-bounds-check/src/Main.java b/test/609-checker-x86-bounds-check/src/Main.java
new file mode 100644
index 0000000..bfc2be8
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/src/Main.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    int[] array = new int[51];
+    testArrayLengthBoundsCheckX86(array, 10);
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], <<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + <<IndexReg>> * 4 + 12], 9
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  // Test assumes parameter value is in lower 8 registers (it is passed in edx).
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], e<<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + r<<IndexReg>> * 4 + 12], 9
+
+  static void testArrayLengthBoundsCheckX86(int[] array, int index) {
+    array[index] = 9;
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 11150c2..8473e06 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -67,4 +67,6 @@
 b/27799205 (5)
 b/27799205 (6)
 b/28187158
+b/29778499 (1)
+b/29778499 (2)
 Done!
diff --git a/test/800-smali/smali/b_29778499_1.smali b/test/800-smali/smali/b_29778499_1.smali
new file mode 100644
index 0000000..6cc0731
--- /dev/null
+++ b/test/800-smali/smali/b_29778499_1.smali
@@ -0,0 +1,19 @@
+.class public LB29778499_1;
+.super Ljava/lang/Object;
+
+# Test returning an object that doesn't implement the declared output interface.
+
+.method public static run()V
+.registers 2
+       invoke-static {}, LB29778499_1;->test()Ljava/lang/Runnable;
+       move-result-object v0
+       invoke-interface {v0}, Ljava/lang/Runnable;->run()V
+       return-void
+.end method
+
+.method public static test()Ljava/lang/Runnable;
+.registers 1
+       new-instance v0, LB29778499_1;
+       invoke-direct {v0}, LB29778499_1;-><init>()V
+       return-object v0
+.end method
diff --git a/test/800-smali/smali/b_29778499_2.smali b/test/800-smali/smali/b_29778499_2.smali
new file mode 100644
index 0000000..ad24d2f
--- /dev/null
+++ b/test/800-smali/smali/b_29778499_2.smali
@@ -0,0 +1,13 @@
+.class public LB29778499_2;
+.super Ljava/lang/Object;
+
+# Test invoking an interface method on an object that doesn't implement any interface.
+# This is testing an edge case (not implementing any interface) for b/18116999.
+
+.method public static run()V
+.registers 1
+       new-instance v0, Ljava/lang/Object;
+       invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+       invoke-interface {v0}, Ljava/lang/Runnable;->run()V
+       return-void
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index b2fc005..bf50879 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -176,6 +176,10 @@
         testCases.add(new TestCase("b/27799205 (6)", "B27799205Helper", "run6", null, null, null));
         testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null },
                 new VerifyError(), null));
+        testCases.add(new TestCase("b/29778499 (1)", "B29778499_1", "run", null,
+                new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/29778499 (2)", "B29778499_2", "run", null,
+                new IncompatibleClassChangeError(), null));
     }
 
     public void runTests() {
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 01790ae..75e74ec 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -102,12 +102,14 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
     ifeq ($$(suffix),d)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/test/Android.libnativebridgetest.mk b/test/Android.libnativebridgetest.mk
index e8cc7e4..992332e 100644
--- a/test/Android.libnativebridgetest.mk
+++ b/test/Android.libnativebridgetest.mk
@@ -60,7 +60,7 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_SHARED_LIBRARIES := libcutils
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     ifeq ($(HOST_OS),linux)
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index f25fb98..996f2f8 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -253,5 +253,12 @@
   names: ["jsr166.CollectionTest#testEmptyMeansEmpty",
           "jsr166.Collection8Test#testForEach",
           "jsr166.Collection8Test#testForEachConcurrentStressTest"]
+},
+{
+  description: "Flaky test",
+  result: EXEC_FAILED,
+  bug: 30107038,
+  modes: [device],
+  names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
 }
 ]