Merge "Remove unused headers."
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 523d143..27d12bd 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -44,13 +44,14 @@
   core_pic_infix :=
   core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY)
 
-  ifeq ($(1),default)
-    core_compile_options += --compiler-backend=Quick
+  # With the optimizing compiler, we want to rerun dex2oat whenever there is
+  # a dex2oat change to catch regressions early.
+  ifeq ($(ART_USE_OPTIMIZING_COMPILER), true)
+    core_dex2oat_dependency := $(DEX2OAT)
   endif
+
   ifeq ($(1),optimizing)
     core_compile_options += --compiler-backend=Optimizing
-    # With the optimizing compiler, we want to rerun dex2oat whenever there is
-    # a dex2oat change to catch regressions early.
     core_dex2oat_dependency := $(DEX2OAT)
     core_infix := -optimizing
   endif
@@ -140,18 +141,22 @@
   core_pic_infix :=
   core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY)
 
-  ifeq ($(1),default)
-    core_compile_options += --compiler-backend=Quick
-  endif
-  ifeq ($(1),optimizing)
+  # With the optimizing compiler, we want to rerun dex2oat whenever there is
+  # a dex2oat change to catch regressions early.
+  ifeq ($(ART_USE_OPTIMIZING_COMPILER), true)
+    core_dex2oat_dependency := $(DEX2OAT)
     ifeq ($($(3)TARGET_ARCH),arm64)
       # TODO: Enable image generation on arm64 once the backend
       # is on par with other architectures.
-      core_compile_options += --compiler-backend=Quick
+      core_compile_options += --compiler-filter=interpret-only
+    endif
+  endif
+
+  ifeq ($(1),optimizing)
+    ifeq ($($(3)TARGET_ARCH),arm64)
+      core_compile_options += --compiler-filter=interpret-only
     else
       core_compile_options += --compiler-backend=Optimizing
-      # With the optimizing compiler, we want to rerun dex2oat whenever there is
-      # a dex2oat change to catch regressions early.
       core_dex2oat_dependency := $(DEX2OAT)
     endif
     core_infix := -optimizing
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 117d8f0..36d065f 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -156,6 +156,13 @@
     case Instruction::USHR_INT:
     case Instruction::USHR_INT_2ADDR:
       return true;
+    case Instruction::CONST:
+    case Instruction::CONST_4:
+    case Instruction::CONST_16:
+      if ((value >> 16) == 0) {
+        return true;  // movw, 16-bit unsigned.
+      }
+      FALLTHROUGH_INTENDED;
     case Instruction::AND_INT:
     case Instruction::AND_INT_2ADDR:
     case Instruction::AND_INT_LIT16:
@@ -899,12 +906,12 @@
  */
 LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
                                   OpSize size) {
-  LIR* load = NULL;
-  ArmOpcode opcode = kThumbBkpt;
+  LIR* load = nullptr;
+  ArmOpcode opcode16 = kThumbBkpt;  // 16-bit Thumb opcode.
+  ArmOpcode opcode32 = kThumbBkpt;  // 32-bit Thumb2 opcode.
   bool short_form = false;
-  bool thumb2Form = (displacement < 4092 && displacement >= 0);
   bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
-  int encoded_disp = displacement;
+  int scale = 0;  // Used for opcode16 and some indexed loads.
   bool already_generated = false;
   switch (size) {
     case kDouble:
@@ -932,57 +939,45 @@
         already_generated = true;
         break;
       }
+      DCHECK_EQ((displacement & 0x3), 0);
+      scale = 2;
       if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
           (displacement >= 0)) {
         short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrPcRel;
+        opcode16 = kThumbLdrPcRel;
       } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
                  (displacement >= 0)) {
         short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrSpRel;
-      } else if (all_low && displacement < 128 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x3), 0);
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrRRI12;
+        opcode16 = kThumbLdrSpRel;
+      } else {
+        short_form = all_low && (displacement >> (5 + scale)) == 0;
+        opcode16 = kThumbLdrRRI5;
+        opcode32 = kThumb2LdrRRI12;
       }
       break;
     case kUnsignedHalf:
-      if (all_low && displacement < 64 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x1), 0);
-        short_form = true;
-        encoded_disp >>= 1;
-        opcode = kThumbLdrhRRI5;
-      } else if (displacement < 4092 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumb2LdrhRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      scale = 1;
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbLdrhRRI5;
+      opcode32 = kThumb2LdrhRRI12;
       break;
     case kSignedHalf:
-      if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrshRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      scale = 1;
+      DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
+      opcode32 = kThumb2LdrshRRI12;
       break;
     case kUnsignedByte:
-      if (all_low && displacement < 32 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumbLdrbRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrbRRI12;
-      }
+      DCHECK_EQ(scale, 0);  // Keep scale = 0.
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbLdrbRRI5;
+      opcode32 = kThumb2LdrbRRI12;
       break;
     case kSignedByte:
-      if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrsbRRI12;
-      }
+      DCHECK_EQ(scale, 0);  // Keep scale = 0.
+      DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
+      opcode32 = kThumb2LdrsbRRI12;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
@@ -990,12 +985,33 @@
 
   if (!already_generated) {
     if (short_form) {
-      load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+      load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale);
+    } else if ((displacement >> 12) == 0) {  // Thumb2 form.
+      load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement);
+    } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
+        InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
+      // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can
+      // actually do it in two because we know that the kOpAdd is a single insn. On the
+      // other hand, we introduce an extra dependency, so this is not necessarily faster.
+      if (opcode16 != kThumbBkpt && r_dest.Low8() &&
+          InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
+        // We can use the 16-bit Thumb opcode for the load.
+        OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale));
+        load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f);
+      } else {
+        DCHECK_NE(opcode32, kThumbBkpt);
+        OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff);
+        load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff);
+      }
     } else {
+      if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
+          (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
+        scale = 0;  // Prefer unscaled indexing if the same number of insns.
+      }
       RegStorage reg_offset = AllocTemp();
-      LoadConstant(reg_offset, encoded_disp);
+      LoadConstant(reg_offset, displacement >> scale);
       DCHECK(!r_dest.IsFloat());
-      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
+      load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size);
       FreeTemp(reg_offset);
     }
   }
@@ -1041,12 +1057,12 @@
 
 LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
                                    OpSize size) {
-  LIR* store = NULL;
-  ArmOpcode opcode = kThumbBkpt;
+  LIR* store = nullptr;
+  ArmOpcode opcode16 = kThumbBkpt;  // 16-bit Thumb opcode.
+  ArmOpcode opcode32 = kThumbBkpt;  // 32-bit Thumb2 opcode.
   bool short_form = false;
-  bool thumb2Form = (displacement < 4092 && displacement >= 0);
   bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
-  int encoded_disp = displacement;
+  int scale = 0;  // Used for opcode16 and some indexed loads.
   bool already_generated = false;
   switch (size) {
     case kDouble:
@@ -1078,53 +1094,67 @@
         already_generated = true;
         break;
       }
+      DCHECK_EQ((displacement & 0x3), 0);
+      scale = 2;
       if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
         short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbStrSpRel;
-      } else if (all_low && displacement < 128 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x3), 0);
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbStrRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrRRI12;
+        opcode16 = kThumbStrSpRel;
+      } else {
+        short_form = all_low && (displacement >> (5 + scale)) == 0;
+        opcode16 = kThumbStrRRI5;
+        opcode32 = kThumb2StrRRI12;
       }
       break;
     case kUnsignedHalf:
     case kSignedHalf:
-      if (all_low && displacement < 64 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x1), 0);
-        short_form = true;
-        encoded_disp >>= 1;
-        opcode = kThumbStrhRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrhRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      scale = 1;
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbStrhRRI5;
+      opcode32 = kThumb2StrhRRI12;
       break;
     case kUnsignedByte:
     case kSignedByte:
-      if (all_low && displacement < 32 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumbStrbRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrbRRI12;
-      }
+      DCHECK_EQ(scale, 0);  // Keep scale = 0.
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbStrbRRI5;
+      opcode32 = kThumb2StrbRRI12;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
   if (!already_generated) {
     if (short_form) {
-      store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
-    } else {
+      store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale);
+    } else if ((displacement >> 12) == 0) {
+      store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement);
+    } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
+        InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
+      // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can
+      // actually do it in two because we know that the kOpAdd is a single insn. On the
+      // other hand, we introduce an extra dependency, so this is not necessarily faster.
       RegStorage r_scratch = AllocTemp();
-      LoadConstant(r_scratch, encoded_disp);
+      if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() &&
+          InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
+        // We can use the 16-bit Thumb opcode for the load.
+        OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale));
+        store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(),
+                        (displacement >> scale) & 0x1f);
+      } else {
+        DCHECK_NE(opcode32, kThumbBkpt);
+        OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff);
+        store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff);
+      }
+      FreeTemp(r_scratch);
+    } else {
+      if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
+          (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
+        scale = 0;  // Prefer unscaled indexing if the same number of insns.
+      }
+      RegStorage r_scratch = AllocTemp();
+      LoadConstant(r_scratch, displacement >> scale);
       DCHECK(!r_src.IsFloat());
-      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+      store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size);
       FreeTemp(r_scratch);
     }
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 58bcee2..0021754 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -775,6 +775,10 @@
         ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
     native_gc_map_builder.AddEntry(native_offset, references);
   }
+
+  // Maybe not necessary, but this could help prevent errors where we access the verified method
+  // after it has been deleted.
+  mir_graph_->GetCurrentDexCompilationUnit()->ClearVerifiedMethod();
 }
 
 /* Determine the offset of each literal field */
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 4929b5b..932a532 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -84,6 +84,15 @@
   return (it != verified_methods_.end()) ? it->second : nullptr;
 }
 
+void VerificationResults::RemoveVerifiedMethod(MethodReference ref) {
+  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+  auto it = verified_methods_.find(ref);
+  if (it != verified_methods_.end()) {
+    delete it->second;
+    verified_methods_.erase(it);
+  }
+}
+
 void VerificationResults::AddRejectedClass(ClassReference ref) {
   {
     WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 0e7923f..7fc2a23 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -48,6 +48,7 @@
 
     const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
         LOCKS_EXCLUDED(verified_methods_lock_);
+    void RemoveVerifiedMethod(MethodReference ref) LOCKS_EXCLUDED(verified_methods_lock_);
 
     void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
     bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ab9f41a..e427471 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -23,6 +23,10 @@
 #include <vector>
 #include <unistd.h>
 
+#ifndef __APPLE__
+#include <malloc.h>  // For mallinfo
+#endif
+
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
 #include "class_linker.h"
@@ -497,6 +501,7 @@
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+  VLOG(compiler) << "Before precompile " << GetMemoryUsageString();
   PreCompile(class_loader, dex_files, thread_pool.get(), timings);
   Compile(class_loader, dex_files, thread_pool.get(), timings);
   if (dump_stats_) {
@@ -593,20 +598,25 @@
 void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                                 ThreadPool* thread_pool, TimingLogger* timings) {
   LoadImageClasses(timings);
+  VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString();
 
   Resolve(class_loader, dex_files, thread_pool, timings);
+  VLOG(compiler) << "Resolve: " << GetMemoryUsageString();
 
   if (!compiler_options_->IsVerificationEnabled()) {
-    LOG(INFO) << "Verify none mode specified, skipping verification.";
+    VLOG(compiler) << "Verify none mode specified, skipping verification.";
     SetVerified(class_loader, dex_files, thread_pool, timings);
     return;
   }
 
   Verify(class_loader, dex_files, thread_pool, timings);
+  VLOG(compiler) << "Verify: " << GetMemoryUsageString();
 
   InitializeClasses(class_loader, dex_files, thread_pool, timings);
+  VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString();
 
   UpdateImageClasses(timings);
+  VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString();
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
@@ -2002,6 +2012,7 @@
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
+  VLOG(compiler) << "Compile: " << GetMemoryUsageString();
 }
 
 void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, size_t class_def_index) {
@@ -2128,6 +2139,7 @@
                                    bool compilation_enabled) {
   CompiledMethod* compiled_method = nullptr;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
+  MethodReference method_ref(&dex_file, method_idx);
 
   if ((access_flags & kAccNative) != 0) {
     // Are we interpreting only and have support for generic JNI down calls?
@@ -2141,7 +2153,6 @@
   } else if ((access_flags & kAccAbstract) != 0) {
     // Abstract methods don't have code.
   } else {
-    MethodReference method_ref(&dex_file, method_idx);
     bool compile = compilation_enabled &&
                    verification_results_->IsCandidateForCompilation(method_ref, access_flags);
     if (compile) {
@@ -2178,16 +2189,18 @@
     // When compiling with PIC, there should be zero non-relative linker patches
     CHECK(!compile_pic || non_relative_linker_patch_count == 0u);
 
-    MethodReference ref(&dex_file, method_idx);
-    DCHECK(GetCompiledMethod(ref) == nullptr) << PrettyMethod(method_idx, dex_file);
+    DCHECK(GetCompiledMethod(method_ref) == nullptr) << PrettyMethod(method_idx, dex_file);
     {
       MutexLock mu(self, compiled_methods_lock_);
-      compiled_methods_.Put(ref, compiled_method);
+      compiled_methods_.Put(method_ref, compiled_method);
       non_relative_linker_patch_count_ += non_relative_linker_patch_count;
     }
-    DCHECK(GetCompiledMethod(ref) != nullptr) << PrettyMethod(method_idx, dex_file);
+    DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file);
   }
 
+  // Done compiling, delete the verified method to reduce native memory usage.
+  verification_results_->RemoveVerifiedMethod(method_ref);
+
   if (self->IsExceptionPending()) {
     ScopedObjectAccess soa(self);
     LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
@@ -2337,4 +2350,21 @@
   }
   return !compile;
 }
+
+std::string CompilerDriver::GetMemoryUsageString() const {
+  std::ostringstream oss;
+  const ArenaPool* arena_pool = GetArenaPool();
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
+  oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+#ifdef HAVE_MALLOC_H
+  struct mallinfo info = mallinfo();
+  const size_t allocated_space = static_cast<size_t>(info.uordblks);
+  const size_t free_space = static_cast<size_t>(info.fordblks);
+  oss << " native alloc=" << PrettySize(allocated_space) << " free="
+      << PrettySize(free_space);
+#endif
+  return oss.str();
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d837dbc..615e0d0 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,6 +39,7 @@
 #include "thread_pool.h"
 #include "utils/arena_allocator.h"
 #include "utils/dedupe_set.h"
+#include "dex/verified_method.h"
 
 namespace art {
 
@@ -398,6 +399,9 @@
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
 
+  // Get memory usage during compilation.
+  std::string GetMemoryUsageString() const;
+
  private:
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 84f5799..03ae489 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -102,6 +102,10 @@
     return verified_method_;
   }
 
+  void ClearVerifiedMethod() {
+    verified_method_ = nullptr;
+  }
+
   const std::string& GetSymbol();
 
  private:
@@ -117,7 +121,7 @@
   const uint16_t class_def_idx_;
   const uint32_t dex_method_idx_;
   const uint32_t access_flags_;
-  const VerifiedMethod* const verified_method_;
+  const VerifiedMethod* verified_method_;
 
   std::string symbol_;
 };
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index 2ffbd10..5488e2f 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -46,7 +46,11 @@
     EXPECT_EQ(expected_value, ef->FindDynamicSymbolAddress(symbol_name)); \
   } while (false)
 
+#if defined(ART_USE_OPTIMIZING_COMPILER)
+TEST_F(ElfWriterTest, DISABLED_dlsym) {
+#else
 TEST_F(ElfWriterTest, dlsym) {
+#endif
   std::string elf_location;
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 3b1d914..ab5c6c7 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -77,6 +77,7 @@
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
+    ProcessStrings();
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -561,9 +562,9 @@
     bool is_prefix = false;
     if (it != existing_strings.end()) {
       CHECK_LE(length, it->second);
-      is_prefix = std::equal(combined_chars.begin() + it->first,
-                             combined_chars.begin() + it->first + it->second,
-                             combined_chars.begin() + new_string.first);
+      is_prefix = std::equal(combined_chars.begin() + new_string.first,
+                             combined_chars.begin() + new_string.first + new_string.second,
+                             combined_chars.begin() + it->first);
     }
     if (is_prefix) {
       // Shares a prefix, set the offset to where the new offset will be.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index eb6181c..76efef0 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -16,6 +16,7 @@
 
 #include "builder.h"
 
+#include "base/logging.h"
 #include "class_linker.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
@@ -68,6 +69,53 @@
   size_t index_;
 };
 
+class SwitchTable : public ValueObject {
+ public:
+  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+    int32_t table_offset = instruction.VRegB_31t();
+    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+    if (sparse) {
+      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+    } else {
+      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+    }
+    num_entries_ = table[1];
+    values_ = reinterpret_cast<const int32_t*>(&table[2]);
+  }
+
+  uint16_t GetNumEntries() const {
+    return num_entries_;
+  }
+
+  int32_t GetEntryAt(size_t index) const {
+    DCHECK_LE(index, static_cast<size_t>(sparse_ ? num_entries_ - 1 : num_entries_));
+    return values_[index];
+  }
+
+  uint32_t GetDexPcForIndex(size_t index) const {
+    DCHECK_LE(index, static_cast<size_t>(sparse_ ? num_entries_ - 1 : num_entries_));
+    return dex_pc_ +
+        (reinterpret_cast<const int16_t*>(values_ + index) -
+         reinterpret_cast<const int16_t*>(&instruction_));
+  }
+
+ private:
+  const Instruction& instruction_;
+  const uint32_t dex_pc_;
+
+  // Whether this is a sparse-switch table (or a packed-switch one).
+  const bool sparse_;
+
+  // This can't be const as it needs to be computed off of the given instruction, and complicated
+  // expressions in the initializer list seemed very ugly.
+  uint16_t num_entries_;
+
+  const int32_t* values_;
+
+  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
 void HGraphBuilder::InitializeLocals(uint16_t count) {
   graph_->SetNumberOfVRegs(count);
   locals_.SetSize(count);
@@ -286,7 +334,7 @@
                                          size_t* number_of_dex_instructions,
                                          size_t* number_of_blocks,
                                          size_t* number_of_branches) {
-  // TODO: Support switch instructions.
+  // TODO: Support sparse-switch instructions.
   branch_targets_.SetSize(code_end - code_ptr);
 
   // Create the first block for the dex instructions, single successor of the entry block.
@@ -296,7 +344,7 @@
 
   // Iterate over all instructions and find branching instructions. Create blocks for
   // the locations these instructions branch to.
-  size_t dex_pc = 0;
+  uint32_t dex_pc = 0;
   while (code_ptr < code_end) {
     (*number_of_dex_instructions)++;
     const Instruction& instruction = *Instruction::At(code_ptr);
@@ -316,6 +364,37 @@
         branch_targets_.Put(dex_pc, block);
         (*number_of_blocks)++;
       }
+    } else if (instruction.Opcode() == Instruction::PACKED_SWITCH) {
+      SwitchTable table(instruction, dex_pc, false);
+
+      uint16_t num_entries = table.GetNumEntries();
+
+      // Entry @0: starting key. Use a larger loop counter type to avoid overflow issues.
+      for (size_t i = 1; i <= num_entries; ++i) {
+        // The target of the case.
+        uint32_t target = dex_pc + table.GetEntryAt(i);
+        if (FindBlockStartingAt(target) == nullptr) {
+          block = new (arena_) HBasicBlock(graph_, target);
+          branch_targets_.Put(target, block);
+          (*number_of_blocks)++;
+        }
+
+        // The next case gets its own block.
+        if (i < num_entries) {
+          block = new (arena_) HBasicBlock(graph_, target);
+          branch_targets_.Put(table.GetDexPcForIndex(i), block);
+          (*number_of_blocks)++;
+        }
+      }
+
+      // Fall-through. Add a block if there is more code afterwards.
+      dex_pc += instruction.SizeInCodeUnits();
+      code_ptr += instruction.SizeInCodeUnits();
+      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+        block = new (arena_) HBasicBlock(graph_, dex_pc);
+        branch_targets_.Put(dex_pc, block);
+        (*number_of_blocks)++;
+      }
     } else {
       code_ptr += instruction.SizeInCodeUnits();
       dex_pc += instruction.SizeInCodeUnits();
@@ -337,9 +416,10 @@
 
 void HGraphBuilder::Conversion_12x(const Instruction& instruction,
                                    Primitive::Type input_type,
-                                   Primitive::Type result_type) {
+                                   Primitive::Type result_type,
+                                   uint32_t dex_pc) {
   HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
-  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first));
+  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
@@ -863,6 +943,63 @@
   return true;
 }
 
+bool HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) {
+  SwitchTable table(instruction, dex_pc, false);
+
+  // Value to test against.
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+
+  // Chained cmp-and-branch, starting from starting_key.
+  int32_t starting_key = table.GetEntryAt(0);
+
+  uint16_t num_entries = table.GetNumEntries();
+  // On overflow condition (or zero cases) just punt.
+  if (num_entries == 0 || num_entries == UINT16_MAX) {
+    return false;
+  }
+
+  for (size_t i = 1; i <= num_entries; i++) {
+    int32_t target_offset = table.GetEntryAt(i);
+    PotentiallyAddSuspendCheck(target_offset, dex_pc);
+
+    // The current case's value.
+    HInstruction* this_case_value = GetIntConstant(starting_key + i - 1);
+
+    // Compare value and this_case_value.
+    HEqual* comparison = new (arena_) HEqual(value, this_case_value);
+    current_block_->AddInstruction(comparison);
+    HInstruction* ifinst = new (arena_) HIf(comparison);
+    current_block_->AddInstruction(ifinst);
+
+    // Case hit: use the target offset to determine where to go.
+    HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
+    DCHECK(case_target != nullptr);
+    current_block_->AddSuccessor(case_target);
+
+    // Case miss: go to the next case (or default fall-through).
+    // When there is a next case, we use the block stored with the table offset representing this
+    // case (that is where we registered them in ComputeBranchTargets).
+    // When there is no next case, we use the following instruction.
+    // TODO: Peel the last iteration to avoid conditional.
+    if (i < table.GetNumEntries()) {
+      HBasicBlock* next_case_target = FindBlockStartingAt(table.GetDexPcForIndex(i));
+      DCHECK(next_case_target != nullptr);
+      current_block_->AddSuccessor(next_case_target);
+
+      // Need to manually add the block, as there is no dex-pc transition for the cases.
+      graph_->AddBlock(next_case_target);
+
+      current_block_ = next_case_target;
+    } else {
+      HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
+      DCHECK(default_target != nullptr);
+      current_block_->AddSuccessor(default_target);
+      current_block_ = nullptr;
+    }
+  }
+  return true;
+}
+
 void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc) {
   if (target_offset <= 0) {
     // Unconditionnally add a suspend check to backward branches. We can remove
@@ -1079,52 +1216,67 @@
     }
 
     case Instruction::INT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
     case Instruction::LONG_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt);
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
       break;
     }
 
     case Instruction::LONG_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat);
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::LONG_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble);
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
     case Instruction::FLOAT_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt);
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_BYTE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_SHORT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_CHAR: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
       break;
     }
 
@@ -1760,6 +1912,13 @@
       break;
     }
 
+    case Instruction::PACKED_SWITCH: {
+      if (!BuildPackedSwitch(instruction, dex_pc)) {
+        return false;
+      }
+      break;
+    }
+
     default:
       return false;
   }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 8519bcb..e4e3705 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -135,7 +135,8 @@
 
   void Conversion_12x(const Instruction& instruction,
                       Primitive::Type input_type,
-                      Primitive::Type result_type);
+                      Primitive::Type result_type,
+                      uint32_t dex_pc);
 
   void BuildCheckedDivRem(uint16_t out_reg,
                           uint16_t first_reg,
@@ -202,6 +203,10 @@
                       uint16_t type_index,
                       uint32_t dex_pc);
 
+  // Builds an instruction sequence for a packed switch statement. This will punt to the interpreter
+  // for a switch with a full 64k set of cases.
+  bool BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e581af2..7f358ea 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -499,6 +499,21 @@
 }
 
 void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
+  if (instruction != nullptr && instruction->IsTypeConversion()) {
+    // The code generated for some type conversions may call the
+    // runtime, thus normally requiring a subsequent call to this
+    // method.  However, the method verifier does not produce PC
+    // information for Dex type conversion instructions, as it
+    // considers them as "atomic" (they cannot join a GC).
+    // Therefore we do not currently record PC information for such
+    // instructions.  As this may change later, we added this special
+    // case so that code generators may nevertheless call
+    // CodeGenerator::RecordPcInfo without triggering an error in
+    // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
+    // thereafter.
+    return;
+  }
+
   // Collect PC infos for the mapping table.
   struct PcInfo pc_info;
   pc_info.dex_pc = dex_pc;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7c8f6a2..1d42c47 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -37,6 +37,8 @@
 
 // Maximum value for a primitive integer.
 static int32_t constexpr kPrimIntMax = 0x7fffffff;
+// Maximum value for a primitive long.
+static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
 
 class Assembler;
 class CodeGenerator;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 448a5a0..5076c85 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -44,8 +44,9 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static constexpr SRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
 
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
  public:
@@ -874,6 +875,7 @@
       || instruction->IsBoundsCheck()
       || instruction->IsNullCheck()
       || instruction->IsDivZeroCheck()
+      || instruction->GetLocations()->CanCall()
       || !IsLeafMethod());
 }
 
@@ -1359,11 +1361,18 @@
 }
 
 void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // Float-to-long conversions invoke the runtime.
+  LocationSummary::CallKind call_kind =
+      (input_type == Primitive::kPrimFloat && result_type == Primitive::kPrimLong)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1434,7 +1443,15 @@
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
+          InvokeRuntimeCallingConvention calling_convention;
+          locations->SetInAt(0, Location::FpuRegisterLocation(
+              calling_convention.GetFpuRegisterAt(0)));
+          locations->SetOut(Location::RegisterPairLocation(R0, R1));
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1484,8 +1501,9 @@
           break;
 
         case Primitive::kPrimDouble:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `double-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
         default:
@@ -1515,8 +1533,9 @@
           break;
 
         case Primitive::kPrimFloat:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `float-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
         default:
@@ -1623,6 +1642,13 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-long' instruction.
+          // This call does not actually record PC information.
+          codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l),
+                                  conversion,
+                                  conversion->GetDexPc());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1704,8 +1730,9 @@
         }
 
         case Primitive::kPrimDouble:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `double-to-float' instruction.
+          __ vcvtsd(out.AsFpuRegister<SRegister>(),
+                    FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
           break;
 
         default:
@@ -1760,8 +1787,9 @@
         }
 
         case Primitive::kPrimFloat:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `float-to-double' instruction.
+          __ vcvtds(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+                    in.AsFpuRegister<SRegister>());
           break;
 
         default:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6f83d9f..2aa121d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1326,11 +1326,18 @@
 }
 
 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // Float-to-long conversions invoke the runtime.
+  LocationSummary::CallKind call_kind =
+      (input_type == Primitive::kPrimFloat && result_type == Primitive::kPrimLong)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1401,7 +1408,15 @@
           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
+          InvokeRuntimeCallingConvention calling_convention;
+          locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+          // The runtime helper puts the result in EAX, EDX.
+          locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1449,8 +1464,9 @@
           break;
 
         case Primitive::kPrimDouble:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `double-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
         default:
@@ -1479,8 +1495,9 @@
           break;
 
         case Primitive::kPrimFloat:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `float-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
         default:
@@ -1615,6 +1632,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-long' instruction.
+          __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pF2l)));
+          // This call does not actually record PC information.
+          codegen_->RecordPcInfo(conversion, conversion->GetDexPc());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1694,8 +1717,8 @@
         }
 
         case Primitive::kPrimDouble:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `double-to-float' instruction.
+          __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           break;
 
         default:
@@ -1741,8 +1764,8 @@
         }
 
         case Primitive::kPrimFloat:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `float-to-double' instruction.
+          __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           break;
 
         default:
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 47fd304..5761fb1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1394,6 +1394,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-long' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1439,8 +1445,9 @@
           break;
 
         case Primitive::kPrimDouble:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `double-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
         default:
@@ -1467,8 +1474,9 @@
           break;
 
         case Primitive::kPrimFloat:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `float-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
 
         default:
@@ -1565,14 +1573,14 @@
 
           __ movl(output, Immediate(kPrimIntMax));
           // temp = int-to-float(output)
-          __ cvtsi2ss(temp, output);
+          __ cvtsi2ss(temp, output, false);
           // if input >= temp goto done
           __ comiss(input, temp);
           __ j(kAboveEqual, &done);
           // if input == NaN goto nan
           __ j(kUnordered, &nan);
           // output = float-to-int-truncate(input)
-          __ cvttss2si(output, input);
+          __ cvttss2si(output, input, false);
           __ jmp(&done);
           __ Bind(&nan);
           //  output = 0
@@ -1604,7 +1612,31 @@
           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
+          XmmRegister input = in.AsFpuRegister<XmmRegister>();
+          CpuRegister output = out.AsRegister<CpuRegister>();
+          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+          Label done, nan;
+
+          __ movq(output, Immediate(kPrimLongMax));
+          // temp = int-to-float(output)
+          __ cvtsi2ss(temp, output, true);
+          // if input >= temp goto done
+          __ comiss(input, temp);
+          __ j(kAboveEqual, &done);
+          // if input == NaN goto nan
+          __ j(kUnordered, &nan);
+          // output = float-to-int-truncate(input)
+          __ cvttss2si(output, input, true);
+          __ jmp(&done);
+          __ Bind(&nan);
+          //  output = 0
+          __ xorq(output, output);
+          __ Bind(&done);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1656,8 +1688,8 @@
           break;
 
         case Primitive::kPrimDouble:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `double-to-float' instruction.
+          __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           break;
 
         default:
@@ -1682,8 +1714,8 @@
           break;
 
         case Primitive::kPrimFloat:
-          LOG(FATAL) << "Type conversion from " << input_type
-                     << " to " << result_type << " not yet implemented";
+          // Processing a Dex `float-to-double' instruction.
+          __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           break;
 
         default:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3908a61..8a25de1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2001,8 +2001,8 @@
 class HTypeConversion : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
-  HTypeConversion(Primitive::Type result_type, HInstruction* input)
-      : HExpression(result_type, SideEffects::None()) {
+  HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
+      : HExpression(result_type, SideEffects::None()), dex_pc_(dex_pc) {
     SetRawInputAt(0, input);
     DCHECK_NE(input->GetType(), result_type);
   }
@@ -2011,12 +2011,18 @@
   Primitive::Type GetInputType() const { return GetInput()->GetType(); }
   Primitive::Type GetResultType() const { return GetType(); }
 
+  // Required by the x86 and ARM code generators when producing calls
+  // to the runtime.
+  uint32_t GetDexPc() const { return dex_pc_; }
+
   bool CanBeMoved() const OVERRIDE { return true; }
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
 
   DECLARE_INSTRUCTION(TypeConversion);
 
  private:
+  const uint32_t dex_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(HTypeConversion);
 };
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 474d8a9..3c21236 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -663,9 +663,19 @@
 
 
 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
+  cvttss2si(dst, src, false);
+}
+
+
+void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
-  EmitOptionalRex32(dst, src);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
   EmitUint8(0x0F);
   EmitUint8(0x2C);
   EmitXmmRegisterOperand(dst.LowBits(), src);
@@ -1997,6 +2007,10 @@
   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
 }
 
+void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
+  EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   if (dst.NeedsRex()) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6e71e4a..4c28366 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -340,6 +340,7 @@
   void cvtsd2ss(XmmRegister dst, XmmRegister src);
 
   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
+  void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
 
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
@@ -688,6 +689,7 @@
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
+  void EmitRex64(CpuRegister dst, XmmRegister src);
 
   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index a1ac2f0..d7669e1 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -25,10 +25,6 @@
 #include <string>
 #include <vector>
 
-#ifndef __APPLE__
-#include <malloc.h>  // For mallinfo
-#endif
-
 #if defined(__linux__) && defined(__arm__)
 #include <sys/personality.h>
 #include <sys/utsname.h>
@@ -987,6 +983,12 @@
     return true;
   }
 
+  void EraseOatFile() {
+    DCHECK(oat_file_.get() != nullptr);
+    oat_file_->Erase();
+    oat_file_.reset();
+  }
+
   // Set up the environment for compilation. Includes starting the runtime and loading/opening the
   // boot class path.
   bool Setup() {
@@ -1301,7 +1303,6 @@
       if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
                              oat_file_.get())) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
-        oat_file_->Erase();
         return false;
       }
     }
@@ -1609,20 +1610,9 @@
   }
 
   void LogCompletionTime() {
-    std::ostringstream mallinfostr;
-#ifdef HAVE_MALLOC_H
-    struct mallinfo info = mallinfo();
-    const size_t allocated_space = static_cast<size_t>(info.uordblks);
-    const size_t free_space = static_cast<size_t>(info.fordblks);
-    mallinfostr << " native alloc=" << PrettySize(allocated_space) << " free="
-        << PrettySize(free_space);
-#endif
-    const ArenaPool* arena_pool = driver_->GetArenaPool();
-    gc::Heap* heap = Runtime::Current()->GetHeap();
     LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
-              << " (threads: " << thread_count_ << ")"
-              << " arena alloc=" << PrettySize(arena_pool->GetBytesAllocated())
-              << " java alloc=" << PrettySize(heap->GetBytesAllocated()) << mallinfostr.str();
+              << " (threads: " << thread_count_ << ") "
+              << driver_->GetMemoryUsageString();
   }
 
   std::unique_ptr<CompilerOptions> compiler_options_;
@@ -1712,6 +1702,7 @@
 
   // Create the boot.oat.
   if (!dex2oat.CreateOatFile()) {
+    dex2oat.EraseOatFile();
     return EXIT_FAILURE;
   }
 
@@ -1756,6 +1747,7 @@
 
   // Create the app oat.
   if (!dex2oat.CreateOatFile()) {
+    dex2oat.EraseOatFile();
     return EXIT_FAILURE;
   }
 
@@ -1813,6 +1805,7 @@
   LOG(INFO) << CommandLine();
 
   if (!dex2oat.Setup()) {
+    dex2oat.EraseOatFile();
     return EXIT_FAILURE;
   }
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 6aab632..ee13e03 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -5963,4 +5963,34 @@
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
+bool ClassLinker::MayBeCalledWithDirectCodePointer(mirror::ArtMethod* m) {
+  // Non-image methods don't use direct code pointer.
+  if (!m->GetDeclaringClass()->IsBootStrapClassLoaded()) {
+    return false;
+  }
+  if (m->IsPrivate()) {
+    // The method can only be called inside its own oat file. Therefore it won't be called using
+    // its direct code if the oat file has been compiled in PIC mode.
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    const DexFile& dex_file = m->GetDeclaringClass()->GetDexFile();
+    const OatFile::OatDexFile* oat_dex_file = class_linker->FindOpenedOatDexFileForDexFile(dex_file);
+    if (oat_dex_file == nullptr) {
+      // No oat file: the method has not been compiled.
+      return false;
+    }
+    const OatFile* oat_file = oat_dex_file->GetOatFile();
+    return oat_file != nullptr && !oat_file->IsPic();
+  } else {
+    // The method can be called outside its own oat file. Therefore it won't be called using its
+    // direct code pointer only if all loaded oat files have been compiled in PIC mode.
+    ReaderMutexLock mu(Thread::Current(), dex_lock_);
+    for (const OatFile* oat_file : oat_files_) {
+      if (!oat_file->IsPic()) {
+        return true;
+      }
+    }
+    return false;
+  }
+}
+
 }  // namespace art
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index b78d0b5..55332f8 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -471,6 +471,10 @@
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Returns true if the method can be called with its direct code pointer, false otherwise.
+  bool MayBeCalledWithDirectCodePointer(mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   const OatFile::OatMethod FindOatMethodFor(mirror::ArtMethod* method, bool* found)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index ac078aa..4f09460 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -1137,4 +1137,24 @@
   CheckPreverified(statics.Get(), true);
 }
 
+TEST_F(ClassLinkerTest, IsBootStrapClassLoaded) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Statics"))));
+
+  // java.lang.Object is a bootstrap class.
+  Handle<mirror::Class> jlo_class(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  ASSERT_TRUE(jlo_class.Get() != nullptr);
+  EXPECT_TRUE(jlo_class.Get()->IsBootStrapClassLoaded());
+
+  // Statics is not a bootstrap class.
+  Handle<mirror::Class> statics(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LStatics;", class_loader)));
+  ASSERT_TRUE(statics.Get() != nullptr);
+  EXPECT_FALSE(statics.Get()->IsBootStrapClassLoaded());
+}
+
 }  // namespace art
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index d5cba50..a9b70cb 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -3264,8 +3264,16 @@
       ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
       const bool is_compiled = class_linker->GetOatMethodQuickCodeFor(m) != nullptr;
       if (is_compiled) {
-        VLOG(jdwp) << "Need selective deoptimization for compiled method " << PrettyMethod(m);
-        return DeoptimizationRequest::kSelectiveDeoptimization;
+        // If the method may be called through its direct code pointer (without loading
+        // its updated entrypoint), we need full deoptimization to not miss the breakpoint.
+        if (class_linker->MayBeCalledWithDirectCodePointer(m)) {
+          VLOG(jdwp) << "Need full deoptimization because of possible direct code call "
+                     << "into image for compiled method " << PrettyMethod(m);
+          return DeoptimizationRequest::kFullDeoptimization;
+        } else {
+          VLOG(jdwp) << "Need selective deoptimization for compiled method " << PrettyMethod(m);
+          return DeoptimizationRequest::kSelectiveDeoptimization;
+        }
       } else {
         // Method is not compiled: we don't need to deoptimize.
         VLOG(jdwp) << "No need for deoptimization for non-compiled method " << PrettyMethod(m);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 812cfd3..bd49754 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1048,6 +1048,11 @@
     DISALLOW_COPY_AND_ASSIGN(InitializeClassVisitor);
   };
 
+  // Returns true if the class loader is null, ie the class loader is the boot strap class loader.
+  bool IsBootStrapClassLoaded() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetClassLoader() == nullptr;
+  }
+
  private:
   void SetVerifyErrorClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java
index e7dbe24..91618fc 100644
--- a/test/422-type-conversion/src/Main.java
+++ b/test/422-type-conversion/src/Main.java
@@ -62,6 +62,18 @@
     }
   }
 
+  public static void assertFloatIsNaN(float result) {
+    if (!Float.isNaN(result)) {
+      throw new Error("Expected: NaN, found: " + result);
+    }
+  }
+
+  public static void assertDoubleIsNaN(double result) {
+    if (!Double.isNaN(result)) {
+      throw new Error("Expected: NaN, found: " + result);
+    }
+  }
+
 
   public static void main(String[] args) {
     // Generate, compile and check int-to-long Dex instructions.
@@ -94,6 +106,15 @@
     // Generate, compile and check float-to-int Dex instructions.
     floatToInt();
 
+    // Generate, compile and check float-to-long Dex instructions.
+    floatToLong();
+
+    // Generate, compile and check float-to-double Dex instructions.
+    floatToDouble();
+
+    // Generate, compile and check double-to-float Dex instructions.
+    doubleToFloat();
+
     // Generate, compile and check int-to-byte Dex instructions.
     shortToByte();
     intToByte();
@@ -342,6 +363,84 @@
     assertIntEquals(-2147483648, $opt$FloatToInt(Float.NEGATIVE_INFINITY));
   }
 
+  private static void floatToLong() {
+    assertLongEquals(1L, $opt$FloatToLong(1F));
+    assertLongEquals(0L, $opt$FloatToLong(0F));
+    assertLongEquals(0L, $opt$FloatToLong(-0F));
+    assertLongEquals(-1L, $opt$FloatToLong(-1F));
+    assertLongEquals(51L, $opt$FloatToLong(51F));
+    assertLongEquals(-51L, $opt$FloatToLong(-51F));
+    assertLongEquals(0L, $opt$FloatToLong(0.5F));
+    assertLongEquals(0L, $opt$FloatToLong(0.4999999F));
+    assertLongEquals(0L, $opt$FloatToLong(-0.4999999F));
+    assertLongEquals(0L, $opt$FloatToLong(-0.5F));
+    assertLongEquals(42L, $opt$FloatToLong(42.199F));
+    assertLongEquals(-42L, $opt$FloatToLong(-42.199F));
+    assertLongEquals(2147483648L, $opt$FloatToLong(2147483647F));  // 2^31 - 1
+    assertLongEquals(-2147483648L, $opt$FloatToLong(-2147483647F));  // -(2^31 - 1)
+    assertLongEquals(-2147483648L, $opt$FloatToLong(-2147483648F));  // -(2^31)
+    assertLongEquals(2147483648L, $opt$FloatToLong(2147483648F));  // (2^31)
+    assertLongEquals(-2147483648L, $opt$FloatToLong(-2147483649F));  // -(2^31 + 1)
+    assertLongEquals(9223372036854775807L, $opt$FloatToLong(9223372036854775807F));  // 2^63 - 1
+    assertLongEquals(-9223372036854775808L, $opt$FloatToLong(-9223372036854775807F));  // -(2^63 - 1)
+    assertLongEquals(-9223372036854775808L, $opt$FloatToLong(-9223372036854775808F));  // -(2^63)
+    assertLongEquals(0L, $opt$FloatToLong(Float.NaN));
+    assertLongEquals(9223372036854775807L, $opt$FloatToLong(Float.POSITIVE_INFINITY));
+    assertLongEquals(-9223372036854775808L, $opt$FloatToLong(Float.NEGATIVE_INFINITY));
+  }
+
+  private static void floatToDouble() {
+    assertDoubleEquals(1D, $opt$FloatToDouble(1F));
+    assertDoubleEquals(0D, $opt$FloatToDouble(0F));
+    assertDoubleEquals(0D, $opt$FloatToDouble(-0F));
+    assertDoubleEquals(-1D, $opt$FloatToDouble(-1F));
+    assertDoubleEquals(51D, $opt$FloatToDouble(51F));
+    assertDoubleEquals(-51D, $opt$FloatToDouble(-51F));
+    assertDoubleEquals(0.5D, $opt$FloatToDouble(0.5F));
+    assertDoubleEquals(0.49999991059303284D, $opt$FloatToDouble(0.4999999F));
+    assertDoubleEquals(-0.49999991059303284D, $opt$FloatToDouble(-0.4999999F));
+    assertDoubleEquals(-0.5D, $opt$FloatToDouble(-0.5F));
+    assertDoubleEquals(42.19900131225586D, $opt$FloatToDouble(42.199F));
+    assertDoubleEquals(-42.19900131225586D, $opt$FloatToDouble(-42.199F));
+    assertDoubleEquals(2147483648D, $opt$FloatToDouble(2147483647F));  // 2^31 - 1
+    assertDoubleEquals(-2147483648D, $opt$FloatToDouble(-2147483647F));  // -(2^31 - 1)
+    assertDoubleEquals(-2147483648D, $opt$FloatToDouble(-2147483648F));  // -(2^31)
+    assertDoubleEquals(2147483648D, $opt$FloatToDouble(2147483648F));  // (2^31)
+    assertDoubleEquals(-2147483648D, $opt$FloatToDouble(-2147483649F));  // -(2^31 + 1)
+    assertDoubleEquals(9223372036854775807D, $opt$FloatToDouble(9223372036854775807F));  // 2^63 - 1
+    assertDoubleEquals(-9223372036854775807D, $opt$FloatToDouble(-9223372036854775807F));  // -(2^63 - 1)
+    assertDoubleEquals(-9223372036854775808D, $opt$FloatToDouble(-9223372036854775808F));  // -(2^63)
+    assertDoubleIsNaN($opt$FloatToDouble(Float.NaN));
+    assertDoubleEquals(Double.POSITIVE_INFINITY, $opt$FloatToDouble(Float.POSITIVE_INFINITY));
+    assertDoubleEquals(Double.NEGATIVE_INFINITY, $opt$FloatToDouble(Float.NEGATIVE_INFINITY));
+  }
+
+  private static void doubleToFloat() {
+    assertFloatEquals(1F, $opt$DoubleToFloat(1D));
+    assertFloatEquals(0F, $opt$DoubleToFloat(0D));
+    assertFloatEquals(0F, $opt$DoubleToFloat(-0D));
+    assertFloatEquals(-1F, $opt$DoubleToFloat(-1D));
+    assertFloatEquals(51F, $opt$DoubleToFloat(51D));
+    assertFloatEquals(-51F, $opt$DoubleToFloat(-51D));
+    assertFloatEquals(0.5F, $opt$DoubleToFloat(0.5D));
+    assertFloatEquals(0.4999999F, $opt$DoubleToFloat(0.4999999D));
+    assertFloatEquals(-0.4999999F, $opt$DoubleToFloat(-0.4999999D));
+    assertFloatEquals(-0.5F, $opt$DoubleToFloat(-0.5D));
+    assertFloatEquals(42.199F, $opt$DoubleToFloat(42.199D));
+    assertFloatEquals(-42.199F, $opt$DoubleToFloat(-42.199D));
+    assertFloatEquals(2147483648F, $opt$DoubleToFloat(2147483647D));  // 2^31 - 1
+    assertFloatEquals(-2147483648F, $opt$DoubleToFloat(-2147483647D));  // -(2^31 - 1)
+    assertFloatEquals(-2147483648F, $opt$DoubleToFloat(-2147483648D));  // -(2^31)
+    assertFloatEquals(2147483648F, $opt$DoubleToFloat(2147483648D));  // (2^31)
+    assertFloatEquals(-2147483648F, $opt$DoubleToFloat(-2147483649D));  // -(2^31 + 1)
+    assertFloatEquals(9223372036854775807F, $opt$DoubleToFloat(9223372036854775807D));  // 2^63 - 1
+    assertFloatEquals(-9223372036854775807F, $opt$DoubleToFloat(-9223372036854775807D));  // -(2^63 - 1)
+    assertFloatEquals(-9223372036854775808F, $opt$DoubleToFloat(-9223372036854775808D));  // -(2^63)
+    assertFloatIsNaN($opt$DoubleToFloat(Float.NaN));
+    assertFloatEquals(Float.POSITIVE_INFINITY, $opt$DoubleToFloat(Double.POSITIVE_INFINITY));
+    assertFloatEquals(Float.NEGATIVE_INFINITY, $opt$DoubleToFloat(Double.NEGATIVE_INFINITY));
+  }
+
   private static void shortToByte() {
     assertByteEquals((byte)1, $opt$ShortToByte((short)1));
     assertByteEquals((byte)0, $opt$ShortToByte((short)0));
@@ -470,48 +569,57 @@
 
 
   // These methods produce int-to-long Dex instructions.
-  static long $opt$ByteToLong(byte a) { return a; }
-  static long $opt$ShortToLong(short a) { return a; }
-  static long $opt$IntToLong(int a) { return a; }
-  static long $opt$CharToLong(int a) { return a; }
+  static long $opt$ByteToLong(byte a) { return (long)a; }
+  static long $opt$ShortToLong(short a) { return (long)a; }
+  static long $opt$IntToLong(int a) { return (long)a; }
+  static long $opt$CharToLong(int a) { return (long)a; }
 
   // These methods produce int-to-float Dex instructions.
-  static float $opt$ByteToFloat(byte a) { return a; }
-  static float $opt$ShortToFloat(short a) { return a; }
-  static float $opt$IntToFloat(int a) { return a; }
-  static float $opt$CharToFloat(char a) { return a; }
+  static float $opt$ByteToFloat(byte a) { return (float)a; }
+  static float $opt$ShortToFloat(short a) { return (float)a; }
+  static float $opt$IntToFloat(int a) { return (float)a; }
+  static float $opt$CharToFloat(char a) { return (float)a; }
 
   // These methods produce int-to-double Dex instructions.
-  static double $opt$ByteToDouble(byte a) { return a; }
-  static double $opt$ShortToDouble(short a) { return a; }
-  static double $opt$IntToDouble(int a) { return a; }
-  static double $opt$CharToDouble(int a) { return a; }
+  static double $opt$ByteToDouble(byte a) { return (double)a; }
+  static double $opt$ShortToDouble(short a) { return (double)a; }
+  static double $opt$IntToDouble(int a) { return (double)a; }
+  static double $opt$CharToDouble(int a) { return (double)a; }
 
   // These methods produce long-to-int Dex instructions.
-  static int $opt$LongToInt(long a){ return (int)a; }
-  static int $opt$LongLiteralToInt(){ return (int)42L; }
+  static int $opt$LongToInt(long a) { return (int)a; }
+  static int $opt$LongLiteralToInt() { return (int)42L; }
 
   // This method produces a long-to-float Dex instruction.
-  static float $opt$LongToFloat(long a){ return (float)a; }
+  static float $opt$LongToFloat(long a) { return (float)a; }
 
   // This method produces a long-to-double Dex instruction.
-  static double $opt$LongToDouble(long a){ return (double)a; }
+  static double $opt$LongToDouble(long a) { return (double)a; }
 
   // This method produces a float-to-int Dex instruction.
-  static int $opt$FloatToInt(float a){ return (int)a; }
+  static int $opt$FloatToInt(float a) { return (int)a; }
+
+  // This method produces a float-to-double Dex instruction.
+  static double $opt$FloatToDouble(float a) { return (double)a; }
+
+  // This method produces a double-to-float Dex instruction.
+  static float $opt$DoubleToFloat(double a) { return (float)a; }
+
+  // This method produces a float-to-long Dex instruction.
+  static long $opt$FloatToLong(float a){ return (long)a; }
 
   // These methods produce int-to-byte Dex instructions.
-  static byte $opt$ShortToByte(short a){ return (byte)a; }
-  static byte $opt$IntToByte(int a){ return (byte)a; }
-  static byte $opt$CharToByte(char a){ return (byte)a; }
+  static byte $opt$ShortToByte(short a) { return (byte)a; }
+  static byte $opt$IntToByte(int a) { return (byte)a; }
+  static byte $opt$CharToByte(char a) { return (byte)a; }
 
   // These methods produce int-to-short Dex instructions.
-  static short $opt$ByteToShort(byte a){ return (short)a; }
-  static short $opt$IntToShort(int a){ return (short)a; }
-  static short $opt$CharToShort(char a){ return (short)a; }
+  static short $opt$ByteToShort(byte a) { return (short)a; }
+  static short $opt$IntToShort(int a) { return (short)a; }
+  static short $opt$CharToShort(char a) { return (short)a; }
 
   // These methods produce int-to-char Dex instructions.
-  static char $opt$ByteToChar(byte a){ return (char)a; }
-  static char $opt$ShortToChar(short a){ return (char)a; }
-  static char $opt$IntToChar(int a){ return (char)a; }
+  static char $opt$ByteToChar(byte a) { return (char)a; }
+  static char $opt$ShortToChar(short a) { return (char)a; }
+  static char $opt$IntToChar(int a) { return (char)a; }
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index b85685b..69ba288 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -342,6 +342,14 @@
       $(IMAGE_TYPES),$(PICTEST_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
+# If ART_USE_OPTIMIZING_COMPILER is set to true, then the default core.art has been
+# compiled with the optimizing compiler.
+ifeq ($(ART_USE_OPTIMIZING_COMPILER),true)
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),$(PICTEST_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=