Merge "Disable Homogeneous space for dex2oat"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index af64470..e3f0c24 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -39,6 +39,7 @@
   NonStaticLeafMethods \
   ProtoCompare \
   ProtoCompare2 \
+  ProfileTestMultiDex \
   StaticLeafMethods \
   Statics \
   StaticsFromCode \
@@ -65,7 +66,7 @@
 
 # Dex file dependencies for each gtest.
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
-ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
+ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
@@ -78,6 +79,8 @@
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
+ART_GTEST_profile_assistant_test_DEX_DEPS := ProfileTestMultiDex
+ART_GTEST_profile_compilation_info_test_DEX_DEPS := ProfileTestMultiDex
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
@@ -191,13 +194,12 @@
   runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
-  runtime/gc/space/dlmalloc_space_base_test.cc \
   runtime/gc/space/dlmalloc_space_static_test.cc \
   runtime/gc/space/dlmalloc_space_random_test.cc \
-  runtime/gc/space/rosalloc_space_base_test.cc \
+  runtime/gc/space/large_object_space_test.cc \
   runtime/gc/space/rosalloc_space_static_test.cc \
   runtime/gc/space/rosalloc_space_random_test.cc \
-  runtime/gc/space/large_object_space_test.cc \
+  runtime/gc/space/space_create_test.cc \
   runtime/gc/task_processor_test.cc \
   runtime/gtest_test.cc \
   runtime/handle_scope_test.cc \
@@ -208,6 +210,7 @@
   runtime/interpreter/safe_math_test.cc \
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/profile_compilation_info_test.cc \
   runtime/lambda/closure_test.cc \
   runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
@@ -267,6 +270,7 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
+  compiler/profile_assistant_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4589736..6eeef3f 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -143,7 +143,9 @@
 	jni/quick/arm64/calling_convention_arm64.cc \
 	linker/arm64/relative_patcher_arm64.cc \
 	optimizing/code_generator_arm64.cc \
+	optimizing/instruction_simplifier_arm.cc \
 	optimizing/instruction_simplifier_arm64.cc \
+	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index b5fd1e0..afc8463 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -168,6 +168,12 @@
   return nullptr;
 }
 
+// Get ProfileCompilationInfo that should be passed to the driver.
+ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
+  // Null, profile information will not be taken into account.
+  return nullptr;
+}
+
 void CommonCompilerTest::SetUp() {
   CommonRuntimeTest::SetUp();
   {
@@ -204,12 +210,10 @@
                                             2,
                                             true,
                                             true,
-                                            "",
-                                            false,
                                             timer_.get(),
                                             -1,
                                             /* dex_to_oat_map */ nullptr,
-                                            /* profile_compilation_info */ nullptr));
+                                            GetProfileCompilationInfo()));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index b491946..7e0fbab 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,6 +23,7 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
+#include "jit/offline_profiling_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -75,6 +76,8 @@
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetCompiledMethods();
 
+  virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+
   virtual void TearDown();
 
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 658e7d6..c250bd9 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -100,8 +100,12 @@
     } else {
       // The method index is actually the dex PC in this case.
       // Calculate the proper dex file and target method idx.
+
+      // We must be in JIT mode if we get here.
       CHECK(use_jit);
-      CHECK_EQ(invoke_type, kVirtual);
+
+      // The invoke type better be virtual, except for the string init special case above.
+      CHECK_EQ(invoke_type, string_init ? kDirect : kVirtual);
       // Don't devirt if we are in a different dex file since we can't have direct invokes in
       // another dex file unless we always put a direct / patch pointer.
       devirt_target = nullptr;
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 32d7518..3766093 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -39,6 +39,7 @@
     true,   // kIntrinsicFloatCvt
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
+    true,   // kIntrinsicBitCount
     true,   // kIntrinsicNumberOfLeadingZeros
     true,   // kIntrinsicNumberOfTrailingZeros
     true,   // kIntrinsicRotateRight
@@ -99,6 +100,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
               "NumberOfLeadingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
@@ -110,9 +112,9 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
@@ -153,7 +155,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
-static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
@@ -293,6 +295,7 @@
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
     "arraycopy",             // kNameCacheArrayCopy
+    "bitCount",              // kNameCacheBitCount
     "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
     "numberOfTrailingZeros",  // kNameCacheNumberOfTrailingZeros
     "rotateRight",           // kNameCacheRotateRight
@@ -447,6 +450,8 @@
     INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
     INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
+    INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+    INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
     INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
     INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
@@ -745,6 +750,7 @@
                                           intrinsic.d.data & kIntrinsicFlagIsOrdered);
     case kIntrinsicSystemArrayCopyCharArray:
       return backend->GenInlinedArrayCopyCharArray(info);
+    case kIntrinsicBitCount:
     case kIntrinsicNumberOfLeadingZeros:
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index ac70577..2803623 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -224,6 +224,7 @@
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
       kNameCacheArrayCopy,
+      kNameCacheBitCount,
       kNameCacheNumberOfLeadingZeros,
       kNameCacheNumberOfTrailingZeros,
       kNameCacheRotateRight,
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index 12568a4..c5df134 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -69,6 +69,8 @@
       false,
       nullptr,
       nullptr,
+      false,
+      "",
       false);
     VerificationResults verification_results(&compiler_options);
     DexFileToMethodInlinerMap method_inliner_map;
@@ -88,8 +90,6 @@
                           0,
                           false,
                           false,
-                          "",
-                          false,
                           0,
                           -1,
                           nullptr,
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index b39fe4d..d63878d 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -52,6 +52,8 @@
         false,
         nullptr,
         nullptr,
+        false,
+        "",
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     method_inliner_map_.reset(new DexFileToMethodInlinerMap());
@@ -69,8 +71,6 @@
         0,
         false,
         false,
-        "",
-        false,
         0,
         -1,
         nullptr,
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index f18fa67..2e2d1f9 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -41,8 +41,6 @@
                         1u,
                         false,
                         false,
-                        "",
-                        false,
                         nullptr,
                         -1,
                         nullptr,
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 043bd93..d021525 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -345,7 +345,6 @@
     std::unordered_set<std::string>* compiled_classes,
     std::unordered_set<std::string>* compiled_methods,
     size_t thread_count, bool dump_stats, bool dump_passes,
-    const std::string& dump_cfg_file_name, bool dump_cfg_append,
     CumulativeLogger* timer, int swap_fd,
     const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
     const ProfileCompilationInfo* profile_compilation_info)
@@ -370,8 +369,6 @@
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
-      dump_cfg_file_name_(dump_cfg_file_name),
-      dump_cfg_append_(dump_cfg_append),
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
@@ -1197,15 +1194,18 @@
   if (equals_referrers_class != nullptr) {
     *equals_referrers_class = (method_id.class_idx_ == type_idx);
   }
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access check, will return true if access is ok or false if we're going to have to
+    // check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access check, will return true if access is ok or false if we're going to have to
-  // check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class);
-  if (result) {
+  if (is_accessible) {
     stats_->TypeDoesntNeedAccessCheck();
     if (type_known_final != nullptr) {
       *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
@@ -1216,7 +1216,7 @@
   } else {
     stats_->TypeNeedsAccessCheck();
   }
-  return result;
+  return is_accessible;
 }
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
@@ -1236,14 +1236,18 @@
   }
   *finalizable = resolved_class->IsFinalizable();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
-  if (referrer_class == nullptr) {
-    stats_->TypeNeedsAccessCheck();
-    return false;  // Incomplete referrer knowledge needs access check.
+  bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
+  if (!is_accessible) {
+    mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+    if (referrer_class == nullptr) {
+      stats_->TypeNeedsAccessCheck();
+      return false;  // Incomplete referrer knowledge needs access check.
+    }
+    // Perform access and instantiable checks, will return true if access is ok or false if we're
+    // going to have to check this at runtime (for example for class loaders).
+    is_accessible = referrer_class->CanAccess(resolved_class);
   }
-  // Perform access and instantiable checks, will return true if access is ok or false if we're
-  // going to have to check this at runtime (for example for class loaders).
-  bool result = referrer_class->CanAccess(resolved_class) && resolved_class->IsInstantiable();
+  bool result = is_accessible && resolved_class->IsInstantiable();
   if (result) {
     stats_->TypeDoesntNeedAccessCheck();
   } else {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 3847c81..6a2f7bf 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -95,7 +95,6 @@
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count, bool dump_stats, bool dump_passes,
-                 const std::string& dump_cfg_file_name, bool dump_cfg_append,
                  CumulativeLogger* timer, int swap_fd,
                  const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
                  const ProfileCompilationInfo* profile_compilation_info);
@@ -122,8 +121,10 @@
       return true;
     }
     auto it1 = dex_file_oat_filename_map_->find(d1);
+    DCHECK(it1 != dex_file_oat_filename_map_->end());
     auto it2 = dex_file_oat_filename_map_->find(d2);
-    return it1 == it2;
+    DCHECK(it2 != dex_file_oat_filename_map_->end());
+    return it1->second == it2->second;
   }
 
   void CompileAll(jobject class_loader,
@@ -421,14 +422,6 @@
     return dump_passes_;
   }
 
-  const std::string& GetDumpCfgFileName() const {
-    return dump_cfg_file_name_;
-  }
-
-  bool GetDumpCfgAppend() const {
-    return dump_cfg_append_;
-  }
-
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -666,8 +659,6 @@
 
   bool dump_stats_;
   const bool dump_passes_;
-  const std::string dump_cfg_file_name_;
-  const bool dump_cfg_append_;
 
   CumulativeLogger* const timings_logger_;
 
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 82c0e86..4c03e5d 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -240,6 +241,94 @@
   EXPECT_TRUE(expected->empty());
 }
 
+class CompilerDriverProfileTest : public CompilerDriverTest {
+ protected:
+  ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
+    ScopedObjectAccess soa(Thread::Current());
+    std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+
+    ProfileCompilationInfo info;
+    for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+      std::cout << std::string(dex_file->GetLocation());
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+    }
+    return &profile_info_;
+  }
+
+  std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
+    if (clazz == "Main") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Main.getA()",
+          "java.lang.String Main.getB()"});
+    } else if (clazz == "Second") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Second.getX()",
+          "java.lang.String Second.getY()"});
+    } else {
+      return std::unordered_set<std::string>();
+    }
+  }
+
+  void CheckCompiledMethods(jobject class_loader,
+                            const std::string& clazz,
+                            const std::unordered_set<std::string>& expected_methods) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+    ASSERT_NE(klass, nullptr);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    size_t number_of_compiled_methods = 0;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      std::string name = PrettyMethod(&m, true);
+      const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+      ASSERT_NE(code, nullptr);
+      if (expected_methods.find(name) != expected_methods.end()) {
+        number_of_compiled_methods++;
+        EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
+      } else {
+        EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
+      }
+    }
+    EXPECT_EQ(expected_methods.size(), number_of_compiled_methods);
+  }
+
+ private:
+  ProfileCompilationInfo profile_info_;
+};
+
+TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Need to enable dex-file writability. Methods rejected to be compiled will run through the
+  // dex-to-dex compiler.
+  ProfileCompilationInfo info;
+  for (const DexFile* dex_file : GetDexFiles(class_loader)) {
+    ASSERT_TRUE(dex_file->EnableWrite());
+  }
+
+  CompileAll(class_loader);
+
+  std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main");
+  std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second");
+  CheckCompiledMethods(class_loader, "LMain;", m);
+  CheckCompiledMethods(class_loader, "LSecond;", s);
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 385f34a..2644528 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -44,7 +44,9 @@
       verbose_methods_(nullptr),
       pass_manager_options_(),
       abort_on_hard_verifier_failure_(false),
-      init_failure_output_(nullptr) {
+      init_failure_output_(nullptr),
+      dump_cfg_file_name_(""),
+      dump_cfg_append_(false) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -71,7 +73,9 @@
                                  bool compile_pic,
                                  const std::vector<std::string>* verbose_methods,
                                  std::ostream* init_failure_output,
-                                 bool abort_on_hard_verifier_failure
+                                 bool abort_on_hard_verifier_failure,
+                                 const std::string& dump_cfg_file_name,
+                                 bool dump_cfg_append
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -94,7 +98,9 @@
     verbose_methods_(verbose_methods),
     pass_manager_options_(),
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
-    init_failure_output_(init_failure_output) {
+    init_failure_output_(init_failure_output),
+    dump_cfg_file_name_(dump_cfg_file_name),
+    dump_cfg_append_(dump_cfg_append) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -238,6 +244,10 @@
     ParsePassOptions(option, Usage);
   } else if (option.starts_with("--dump-init-failures=")) {
     ParseDumpInitFailures(option, Usage);
+  } else if (option.starts_with("--dump-cfg=")) {
+    dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
+  } else if (option.starts_with("--dump-cfg-append")) {
+    dump_cfg_append_ = true;
   } else {
     // Option not recognized.
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index f14bdc4..d47fc2a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -83,7 +83,9 @@
                   bool compile_pic,
                   const std::vector<std::string>* verbose_methods,
                   std::ostream* init_failure_output,
-                  bool abort_on_hard_verifier_failure);
+                  bool abort_on_hard_verifier_failure,
+                  const std::string& dump_cfg_file_name,
+                  bool dump_cfg_append);
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -224,6 +226,14 @@
 
   bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
 
+  const std::string& GetDumpCfgFileName() const {
+    return dump_cfg_file_name_;
+  }
+
+  bool GetDumpCfgAppend() const {
+    return dump_cfg_append_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -273,6 +283,9 @@
   // Log initialization of initialization failures to this stream if not null.
   std::unique_ptr<std::ostream> init_failure_output_;
 
+  std::string dump_cfg_file_name_;
+  bool dump_cfg_append_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index b67e8dd..35b3e15 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -29,7 +29,7 @@
   // TODO: Arm S0–S31 register mapping is obsolescent.
   //   We should use VFP-v3/Neon D0-D31 mapping instead.
   //   However, D0 is aliased to pair of S0 and S1, so using that
-  //   mapping we can not easily say S0 is spilled and S1 is not.
+  //   mapping we cannot easily say S0 is spilled and S1 is not.
   //   There are ways around this in DWARF but they are complex.
   //   It would be much simpler to always spill whole D registers.
   //   Arm64 mapping is correct since we already do this there.
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index a7461a5..46484b1 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -100,12 +100,6 @@
       header_.sh_entsize = entsize;
     }
 
-    ~Section() OVERRIDE {
-      if (started_) {
-        CHECK(finished_);
-      }
-    }
-
     // Start writing of this section.
     void Start() {
       CHECK(!started_);
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index dd50f69..e03614f 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -212,7 +212,7 @@
     case kNone:
       break;
   }
-  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  LOG(FATAL) << "Cannot write CIE frame for ISA " << isa;
   UNREACHABLE();
 }
 
@@ -653,6 +653,21 @@
             info_.EndTag();  // DW_TAG_member.
           }
 
+          if (type->IsStringClass()) {
+            // Emit debug info about an artifical class member for java.lang.String which represents
+            // the first element of the data stored in a string instance. Consumers of the debug
+            // info will be able to read the content of java.lang.String based on the count (real
+            // field) and based on the location of this data member.
+            info_.StartTag(DW_TAG_member);
+            WriteName("value");
+            // We don't support fields with C like array types so we just say its type is java char.
+            WriteLazyType("C");  // char.
+            info_.WriteUdata(DW_AT_data_member_location,
+                             mirror::String::ValueOffset().Uint32Value());
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+            info_.EndTag();  // DW_TAG_member.
+          }
+
           EndClassTag(desc);
         }
       }
@@ -883,6 +898,8 @@
         info_.EndTag();
       } else {
         // Primitive types.
+        DCHECK_EQ(desc.size(), 1u);
+
         const char* name;
         uint32_t encoding;
         uint32_t byte_size;
@@ -1226,26 +1243,8 @@
   std::vector<uintptr_t> debug_line_patches;
 };
 
-// Get all types loaded by the runtime.
-static std::vector<mirror::Class*> GetLoadedRuntimeTypes() SHARED_REQUIRES(Locks::mutator_lock_) {
-  std::vector<mirror::Class*> result;
-  class CollectClasses : public ClassVisitor {
-   public:
-    virtual bool Visit(mirror::Class* klass) {
-      classes_->push_back(klass);
-      return true;
-    }
-    std::vector<mirror::Class*>* classes_;
-  };
-  CollectClasses visitor;
-  visitor.classes_ = &result;
-  Runtime::Current()->GetClassLinker()->VisitClasses(&visitor);
-  return result;
-}
-
 template<typename ElfTypes>
 static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                               bool write_loaded_runtime_types,
                                const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
@@ -1274,19 +1273,12 @@
   }
 
   // Write .debug_info section.
-  if (!compilation_units.empty() || write_loaded_runtime_types) {
+  if (!compilation_units.empty()) {
     DebugInfoWriter<ElfTypes> info_writer(builder);
     info_writer.Start();
     for (const auto& compilation_unit : compilation_units) {
       info_writer.WriteCompilationUnit(compilation_unit);
     }
-    if (write_loaded_runtime_types) {
-      Thread* self = Thread::Current();
-      // The lock prevents the classes being moved by the GC.
-      ReaderMutexLock mu(self, *Locks::mutator_lock_);
-      std::vector<mirror::Class*> types = GetLoadedRuntimeTypes();
-      info_writer.WriteTypes(ArrayRef<mirror::Class*>(types.data(), types.size()));
-    }
     info_writer.End();
   }
 }
@@ -1353,7 +1345,6 @@
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
-                    bool write_loaded_runtime_types,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format) {
   // Add methods to .symtab.
@@ -1361,7 +1352,7 @@
   // Generate CFI (stack unwinding information).
   WriteCFISection(builder, method_infos, cfi_format);
   // Write DWARF .debug_* sections.
-  WriteDebugSections(builder, write_loaded_runtime_types, method_infos);
+  WriteDebugSections(builder, method_infos);
 }
 
 template <typename ElfTypes>
@@ -1374,7 +1365,6 @@
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
   builder->Start();
   WriteDebugInfo(builder.get(),
-                 false,
                  ArrayRef<const MethodDebugInfo>(&method_info, 1),
                  DW_DEBUG_FRAME_FORMAT);
   builder->End();
@@ -1396,8 +1386,8 @@
 }
 
 template <typename ElfTypes>
-static ArrayRef<const uint8_t> WriteDebugElfFileForClassInternal(const InstructionSet isa,
-                                                                 mirror::Class* type)
+static ArrayRef<const uint8_t> WriteDebugElfFileForClassesInternal(
+    const InstructionSet isa, const ArrayRef<mirror::Class*>& types)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
@@ -1407,7 +1397,7 @@
 
   DebugInfoWriter<ElfTypes> info_writer(builder.get());
   info_writer.Start();
-  info_writer.WriteTypes(ArrayRef<mirror::Class*>(&type, 1));
+  info_writer.WriteTypes(types);
   info_writer.End();
 
   builder->End();
@@ -1419,23 +1409,22 @@
   return ArrayRef<const uint8_t>(result, buffer.size());
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) {
+ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
+                                                    const ArrayRef<mirror::Class*>& types) {
   if (Is64BitInstructionSet(isa)) {
-    return WriteDebugElfFileForClassInternal<ElfTypes64>(isa, type);
+    return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, types);
   } else {
-    return WriteDebugElfFileForClassInternal<ElfTypes32>(isa, type);
+    return WriteDebugElfFileForClassesInternal<ElfTypes32>(isa, types);
   }
 }
 
 // Explicit instantiations
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    bool write_loaded_runtime_types,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
 template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    bool write_loaded_runtime_types,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
 
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 91da00f..e4bc856 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -32,13 +32,13 @@
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
-                    bool write_loaded_runtime_types,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format);
 
 ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type)
+ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
+                                                    const ArrayRef<mirror::Class*>& types)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
 }  // namespace dwarf
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index a67f3bd..7b1bdd7 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -152,7 +152,7 @@
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
-    dwarf::WriteDebugInfo(builder_.get(), /* write_types */ true, method_infos, kCFIFormat);
+    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
   }
 }
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6859605..12132c0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -95,25 +95,37 @@
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0,
-                           0,
-                           0,
-                           compiler_driver_.get(),
-                           writer.get(),
-                           /*compiling_boot_image*/true,
-                           &timings,
-                           &key_value_store);
+      const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
       std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
           compiler_driver_->GetInstructionSet(),
           &compiler_driver_->GetCompilerOptions(),
           oat_file.GetFile());
-      bool success = writer->PrepareImageAddressSpace();
-      ASSERT_TRUE(success);
-
       elf_writer->Start();
-
+      OatWriter oat_writer(/*compiling_boot_image*/true, &timings);
       OutputStream* rodata = elf_writer->StartRoData();
+      for (const DexFile* dex_file : dex_files) {
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writer.AddRawDexFileSource(raw_dex_file,
+                                       dex_file->GetLocation().c_str(),
+                                       dex_file->GetLocationChecksum());
+      }
+      std::unique_ptr<MemMap> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      bool dex_files_ok = oat_writer.WriteAndOpenDexFiles(
+          rodata,
+          oat_file.GetFile(),
+          compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
+          &key_value_store,
+          &opened_dex_files_map,
+          &opened_dex_files);
+      ASSERT_TRUE(dex_files_ok);
+      oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files);
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
       bool rodata_ok = oat_writer.WriteRodata(rodata);
       ASSERT_TRUE(rodata_ok);
       elf_writer->EndRoData(rodata);
@@ -123,12 +135,15 @@
       ASSERT_TRUE(text_ok);
       elf_writer->EndText(text);
 
+      bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+      ASSERT_TRUE(header_ok);
+
       elf_writer->SetBssSize(oat_writer.GetBssSize());
       elf_writer->WriteDynamicSection();
       elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
       elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
-      success = elf_writer->End();
+      bool success = elf_writer->End();
 
       ASSERT_TRUE(success);
     }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index bc51ed6..3a3275a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -28,6 +28,8 @@
 #include "dex/quick_compiler_callbacks.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "elf_writer_debug.h"
+#include "jit/debugger_interface.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "oat_file-inl.h"
@@ -65,6 +67,17 @@
   return jit_compiler->CompileMethod(self, method);
 }
 
+extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
+  DCHECK(jit_compiler != nullptr);
+  if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) {
+    const ArrayRef<mirror::Class*> types_array(types, count);
+    ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForClasses(kRuntimeISA, types_array);
+    CreateJITCodeEntry(std::unique_ptr<const uint8_t[]>(elf_file.data()), elf_file.size());
+  }
+}
+
 // Callers of this method assume it has NO_RETURN.
 NO_RETURN static void Usage(const char* fmt, ...) {
   va_list ap;
@@ -97,7 +110,9 @@
       /* pic */ true,  // TODO: Support non-PIC in optimizing.
       /* verbose_methods */ nullptr,
       /* init_failure_output */ nullptr,
-      /* abort_on_hard_verifier_failure */ false));
+      /* abort_on_hard_verifier_failure */ false,
+      /* dump_cfg_file_name */ "",
+      /* dump_cfg_append */ false));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
@@ -153,8 +168,6 @@
       /* thread_count */ 1,
       /* dump_stats */ false,
       /* dump_passes */ false,
-      /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false,
       cumulative_logger_.get(),
       /* swap_fd */ -1,
       /* dex to oat map */ nullptr,
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index b10cc35..bf8e786 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@
         driver_(&compiler_options_, &verification_results_, &inliner_map_,
                 Compiler::kQuick, instruction_set, nullptr,
                 false, nullptr, nullptr, nullptr, 1u,
-                false, false, "", false, nullptr, -1, nullptr, nullptr),
+                false, false, nullptr, -1, nullptr, nullptr),
         error_msg_(),
         instruction_set_(instruction_set),
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 9f7ffa5..c0d15f3 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -38,6 +38,7 @@
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
+#include "utils/test_dex_file_builder.h"
 
 namespace art {
 
@@ -117,8 +118,6 @@
                                               2,
                                               true,
                                               true,
-                                              "",
-                                              false,
                                               timer_.get(),
                                               -1,
                                               nullptr,
@@ -129,23 +128,74 @@
                 const std::vector<const DexFile*>& dex_files,
                 SafeMap<std::string, std::string>& key_value_store) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(dex_files,
-                         42U,
-                         4096U,
-                         0,
-                         compiler_driver_.get(),
-                         nullptr,
-                         /*compiling_boot_image*/false,
-                         &timings,
-                         &key_value_store);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const DexFile* dex_file : dex_files) {
+      ArrayRef<const uint8_t> raw_dex_file(
+          reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+          dex_file->GetHeader().file_size_);
+      if (!oat_writer.AddRawDexFileSource(raw_dex_file,
+                                          dex_file->GetLocation().c_str(),
+                                          dex_file->GetLocationChecksum())) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const char*>& dex_filenames,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const char* dex_filename : dex_filenames) {
+      if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool WriteElf(File* file,
+                ScopedFd&& zip_fd,
+                const char* location,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
+      return false;
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool DoWriteElf(File* file,
+                  OatWriter& oat_writer,
+                  SafeMap<std::string, std::string>& key_value_store) {
     std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
         compiler_driver_->GetInstructionSet(),
         &compiler_driver_->GetCompilerOptions(),
         file);
-
     elf_writer->Start();
-
     OutputStream* rodata = elf_writer->StartRoData();
+    std::unique_ptr<MemMap> opened_dex_files_map;
+    std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+    if (!oat_writer.WriteAndOpenDexFiles(rodata,
+                                         file,
+                                         compiler_driver_->GetInstructionSet(),
+                                         compiler_driver_->GetInstructionSetFeatures(),
+                                         &key_value_store,
+                                         &opened_dex_files_map,
+                                         &opened_dex_files)) {
+      return false;
+    }
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    std::vector<const DexFile*> dex_files;
+    for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+      dex_files.push_back(dex_file.get());
+      ScopedObjectAccess soa(Thread::Current());
+      class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+    }
+    oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files);
     if (!oat_writer.WriteRodata(rodata)) {
       return false;
     }
@@ -157,6 +207,10 @@
     }
     elf_writer->EndText(text);
 
+    if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) {
+      return false;
+    }
+
     elf_writer->SetBssSize(oat_writer.GetBssSize());
     elf_writer->WriteDynamicSection();
     elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
@@ -169,6 +223,117 @@
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 };
 
+class ZipBuilder {
+ public:
+  explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { }
+
+  bool AddFile(const char* location, const void* data, size_t size) {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    ZipFileHeader file_header;
+    file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size);
+    file_header.compressed_size = size;
+    file_header.uncompressed_size = size;
+    file_header.filename_length = strlen(location);
+
+    if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) ||
+        !zip_file_->WriteFully(location, file_header.filename_length) ||
+        !zip_file_->WriteFully(data, size)) {
+      return false;
+    }
+
+    CentralDirectoryFileHeader cdfh;
+    cdfh.crc32 = file_header.crc32;
+    cdfh.compressed_size = size;
+    cdfh.uncompressed_size = size;
+    cdfh.filename_length = file_header.filename_length;
+    cdfh.relative_offset_of_local_file_header = offset;
+    file_data_.push_back(FileData { cdfh, location });
+    return true;
+  }
+
+  bool Finish() {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    size_t central_directory_size = 0u;
+    for (const FileData& file_data : file_data_) {
+      if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) ||
+          !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) {
+        return false;
+      }
+      central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length;
+    }
+    EndOfCentralDirectoryRecord eocd_record;
+    eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size();
+    eocd_record.total_number_of_central_directory_records = file_data_.size();
+    eocd_record.size_of_central_directory = central_directory_size;
+    eocd_record.offset_of_start_of_central_directory = offset;
+    return
+        zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) &&
+        zip_file_->Flush() == 0;
+  }
+
+ private:
+  struct PACKED(1) ZipFileHeader {
+    uint32_t signature = 0x04034b50;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+  };
+
+  struct PACKED(1) CentralDirectoryFileHeader {
+    uint32_t signature = 0x02014b50;
+    uint16_t version_made_by = 10;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+    uint16_t file_comment_length = 0u;          // No file comment.
+    uint16_t disk_number_where_file_starts = 0u;
+    uint16_t internal_file_attributes = 0u;
+    uint32_t external_file_attributes = 0u;
+    uint32_t relative_offset_of_local_file_header;
+  };
+
+  struct PACKED(1) EndOfCentralDirectoryRecord {
+    uint32_t signature = 0x06054b50;
+    uint16_t number_of_this_disk = 0u;
+    uint16_t disk_where_central_directory_starts = 0u;
+    uint16_t number_of_central_directory_records_on_this_disk;
+    uint16_t total_number_of_central_directory_records;
+    uint32_t size_of_central_directory;
+    uint32_t offset_of_start_of_central_directory;
+    uint16_t comment_length = 0u;               // No file comment.
+  };
+
+  struct FileData {
+    CentralDirectoryFileHeader cdfh;
+    const char* location;
+  };
+
+  File* zip_file_;
+  std::vector<FileData> file_data_;
+};
+
 TEST_F(OatTest, WriteRead) {
   TimingLogger timings("OatTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -329,4 +494,189 @@
   EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
 }
 
+TEST_F(OatTest, DexFileInput) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  std::vector<const char*> input_filenames;
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "int", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()I", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file1.GetFilename().c_str());
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file2.GetFilename().c_str());
+
+  ScratchFile oat_file;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+  ASSERT_TRUE(success);
+
+  std::string error_msg;
+  std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                         oat_file.GetFilename(),
+                                                         nullptr,
+                                                         nullptr,
+                                                         false,
+                                                         nullptr,
+                                                         &error_msg));
+  ASSERT_TRUE(opened_oat_file != nullptr);
+  ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+  std::unique_ptr<const DexFile> opened_dex_file1 =
+      opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+  std::unique_ptr<const DexFile> opened_dex_file2 =
+      opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+  ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                      &opened_dex_file1->GetHeader(),
+                      dex_file1_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation());
+
+  ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                      &opened_dex_file2->GetHeader(),
+                      dex_file2_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+}
+
+TEST_F(OatTest, ZipFileInput) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "long", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()D", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes.dex",
+                                &dex_file1_data->GetHeader(),
+                                dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes2.dex",
+                                &dex_file2_data->GetHeader(),
+                                dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  {
+    // Test using the AddDexFileSource() interface with the zip file.
+    std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+    ASSERT_TRUE(success);
+
+    std::string error_msg;
+    std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                           oat_file.GetFilename(),
+                                                           nullptr,
+                                                           nullptr,
+                                                           false,
+                                                           nullptr,
+                                                           &error_msg));
+    ASSERT_TRUE(opened_oat_file != nullptr);
+    ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+    std::unique_ptr<const DexFile> opened_dex_file1 =
+        opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> opened_dex_file2 =
+        opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+    ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                        &opened_dex_file1->GetHeader(),
+                        dex_file1_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+              opened_dex_file1->GetLocation());
+
+    ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                        &opened_dex_file2->GetHeader(),
+                        dex_file2_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+              opened_dex_file2->GetLocation());
+  }
+
+  {
+    // Test using the AddZipDexFileSource() interface with the zip file handle.
+    ScopedFd zip_fd(dup(zip_file.GetFd()));
+    ASSERT_NE(-1, zip_fd.get());
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(),
+                       std::move(zip_fd),
+                       zip_file.GetFilename().c_str(),
+                       key_value_store);
+    ASSERT_TRUE(success);
+
+    std::string error_msg;
+    std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                           oat_file.GetFilename(),
+                                                           nullptr,
+                                                           nullptr,
+                                                           false,
+                                                           nullptr,
+                                                           &error_msg));
+    ASSERT_TRUE(opened_oat_file != nullptr);
+    ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+    std::unique_ptr<const DexFile> opened_dex_file1 =
+        opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> opened_dex_file2 =
+        opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+    ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                        &opened_dex_file1->GetHeader(),
+                        dex_file1_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+              opened_dex_file1->GetLocation());
+
+    ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                        &opened_dex_file2->GetHeader(),
+                        dex_file2_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+              opened_dex_file2->GetLocation());
+  }
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 025e35e..c74c41f0 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -16,12 +16,14 @@
 
 #include "oat_writer.h"
 
+#include <unistd.h>
 #include <zlib.h>
 
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method-inl.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
+#include "base/file_magic.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -49,9 +51,77 @@
 #include "type_lookup_table.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
+#include "zip_archive.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
+
+const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
+    return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
+}
+
+}  // anonymous namespace
+
+// Defines the location of the raw dex file to write.
+class OatWriter::DexFileSource {
+ public:
+  explicit DexFileSource(ZipEntry* zip_entry)
+      : type_(kZipEntry), source_(zip_entry) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(File* raw_file)
+      : type_(kRawFile), source_(raw_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(const uint8_t* dex_file)
+      : type_(kRawData), source_(dex_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  bool IsZipEntry() const { return type_ == kZipEntry; }
+  bool IsRawFile() const { return type_ == kRawFile; }
+  bool IsRawData() const { return type_ == kRawData; }
+
+  ZipEntry* GetZipEntry() const {
+    DCHECK(IsZipEntry());
+    DCHECK(source_ != nullptr);
+    return static_cast<ZipEntry*>(const_cast<void*>(source_));
+  }
+
+  File* GetRawFile() const {
+    DCHECK(IsRawFile());
+    DCHECK(source_ != nullptr);
+    return static_cast<File*>(const_cast<void*>(source_));
+  }
+
+  const uint8_t* GetRawData() const {
+    DCHECK(IsRawData());
+    DCHECK(source_ != nullptr);
+    return static_cast<const uint8_t*>(source_);
+  }
+
+  void Clear() {
+    type_ = kNone;
+    source_ = nullptr;
+  }
+
+ private:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
+  Type type_;
+  const void* source_;
+};
+
 class OatWriter::OatClass {
  public:
   OatClass(size_t offset,
@@ -116,11 +186,30 @@
 
 class OatWriter::OatDexFile {
  public:
-  OatDexFile(size_t offset, const DexFile& dex_file);
+  OatDexFile(const char* dex_file_location,
+             DexFileSource source,
+             CreateTypeLookupTable create_type_lookup_table);
   OatDexFile(OatDexFile&& src) = default;
 
+  const char* GetLocation() const {
+    return dex_file_location_data_;
+  }
+
+  void ReserveTypeLookupTable(OatWriter* oat_writer);
+  void ReserveClassOffsets(OatWriter* oat_writer);
+
   size_t SizeOf() const;
-  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+  bool Write(OatWriter* oat_writer, OutputStream* out) const;
+  bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
+
+  // The source of the dex file.
+  DexFileSource source_;
+
+  // Whether to create the type lookup table.
+  CreateTypeLookupTable create_type_lookup_table_;
+
+  // Dex file size. Initialized when writing the dex file.
+  size_t dex_file_size_;
 
   // Offset of start of OatDexFile from beginning of OatHeader. It is
   // used to validate file position when writing.
@@ -128,11 +217,13 @@
 
   // Data to write.
   uint32_t dex_file_location_size_;
-  const uint8_t* dex_file_location_data_;
+  const char* dex_file_location_data_;
   uint32_t dex_file_location_checksum_;
   uint32_t dex_file_offset_;
+  uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
-  TypeLookupTable* lookup_table_;  // Owned by the dex file.
+
+  // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
  private:
@@ -151,26 +242,20 @@
   DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " offset_=" << offset_
 
-OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uintptr_t image_file_location_oat_begin,
-                     int32_t image_patch_delta,
-                     const CompilerDriver* compiler,
-                     ImageWriter* image_writer,
-                     bool compiling_boot_image,
-                     TimingLogger* timings,
-                     SafeMap<std::string, std::string>* key_value_store)
-  : compiler_driver_(compiler),
-    image_writer_(image_writer),
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+  : write_state_(WriteState::kAddingDexFileSources),
+    timings_(timings),
+    raw_dex_files_(),
+    zip_archives_(),
+    zipped_dex_files_(),
+    zipped_dex_file_locations_(),
+    compiler_driver_(nullptr),
+    image_writer_(nullptr),
     compiling_boot_image_(compiling_boot_image),
-    dex_files_(&dex_files),
+    dex_files_(nullptr),
     size_(0u),
     bss_size_(0u),
     oat_data_offset_(0u),
-    image_file_location_oat_checksum_(image_file_location_oat_checksum),
-    image_file_location_oat_begin_(image_file_location_oat_begin),
-    image_patch_delta_(image_patch_delta),
-    key_value_store_(key_value_store),
     oat_header_(nullptr),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
@@ -197,55 +282,192 @@
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
+    size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
-    size_oat_dex_file_class_offsets_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
+    size_oat_class_offsets_alignment_(0),
+    size_oat_class_offsets_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
     method_offset_map_() {
-  CHECK(key_value_store != nullptr);
-  if (compiling_boot_image) {
-    CHECK(image_writer != nullptr);
+}
+
+bool OatWriter::AddDexFileSource(const char* filename,
+                                 const char* location,
+                                 CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  uint32_t magic;
+  std::string error_msg;
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+  if (fd.get() == -1) {
+    PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
+    return false;
+  } else if (IsDexMagic(magic)) {
+    // The file is open for reading, not writing, so it's OK to let the File destructor
+    // close it without checking for explicit Close(), so pass checkUsage = false.
+    raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+    oat_dex_files_.emplace_back(location,
+                                DexFileSource(raw_dex_files_.back().get()),
+                                create_type_lookup_table);
+  } else if (IsZipMagic(magic)) {
+    if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
+      return false;
+    }
+  } else {
+    LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'";
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source(s) from a zip file specified by a file handle.
+bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+                                        const char* location,
+                                        CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  std::string error_msg;
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+  ZipArchive* zip_archive = zip_archives_.back().get();
+  if (zip_archive == nullptr) {
+    LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
+        << error_msg;
+    return false;
+  }
+  for (size_t i = 0; ; ++i) {
+    std::string entry_name = DexFile::GetMultiDexClassesDexName(i);
+    std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg));
+    if (entry == nullptr) {
+      break;
+    }
+    zipped_dex_files_.push_back(std::move(entry));
+    zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+    const char* full_location = zipped_dex_file_locations_.back().c_str();
+    oat_dex_files_.emplace_back(full_location,
+                                DexFileSource(zipped_dex_files_.back().get()),
+                                create_type_lookup_table);
+  }
+  if (zipped_dex_file_locations_.empty()) {
+    LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source from raw memory.
+bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
+                                    const char* location,
+                                    uint32_t location_checksum,
+                                    CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  if (data.size() < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Provided data is shorter than dex file header. size: "
+               << data.size() << " File: " << location;
+    return false;
+  }
+  if (!ValidateDexFileHeader(data.data(), location)) {
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data());
+  if (data.size() < header->file_size_) {
+    LOG(ERROR) << "Truncated dex file data. Data size: " << data.size()
+               << " file size from header: " << header->file_size_ << " File: " << location;
+    return false;
+  }
+
+  oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
+  oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+  return true;
+}
+
+dchecked_vector<const char*> OatWriter::GetSourceLocations() const {
+  dchecked_vector<const char*> locations;
+  locations.reserve(oat_dex_files_.size());
+  for (const OatDexFile& oat_dex_file : oat_dex_files_) {
+    locations.push_back(oat_dex_file.GetLocation());
+  }
+  return locations;
+}
+
+bool OatWriter::WriteAndOpenDexFiles(
+    OutputStream* rodata,
+    File* file,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    SafeMap<std::string, std::string>* key_value_store,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  CHECK(write_state_ == WriteState::kAddingDexFileSources);
+
+  size_t offset = InitOatHeader(instruction_set,
+                                instruction_set_features,
+                                dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+                                key_value_store);
+  offset = InitOatDexFiles(offset);
+  size_ = offset;
+
+  std::unique_ptr<MemMap> dex_files_map;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  if (!WriteDexFiles(rodata, file)) {
+    return false;
+  }
+  // Reserve space for type lookup tables and update type_lookup_table_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveTypeLookupTable(this);
+  }
+  size_t size_after_type_lookup_tables = size_;
+  // Reserve space for class offsets and update class_offsets_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveClassOffsets(this);
+  }
+  if (!WriteOatDexFiles(rodata) ||
+      !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
+      !OpenDexFiles(file, &dex_files_map, &dex_files) ||
+      !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
+    return false;
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  write_state_ = WriteState::kPrepareLayout;
+  return true;
+}
+
+void OatWriter::PrepareLayout(const CompilerDriver* compiler,
+                              ImageWriter* image_writer,
+                              const std::vector<const DexFile*>& dex_files) {
+  CHECK(write_state_ == WriteState::kPrepareLayout);
+
+  dex_files_ = &dex_files;
+
+  compiler_driver_ = compiler;
+  image_writer_ = image_writer;
+  if (compiling_boot_image_) {
+    CHECK(image_writer_ != nullptr);
   }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+  CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
                                                       &method_offset_map_);
 
-  size_t offset;
+  uint32_t offset = size_;
   {
-    TimingLogger::ScopedTiming split("InitOatHeader", timings);
-    offset = InitOatHeader();
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
-    offset = InitOatDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitDexFiles", timings);
-    offset = InitDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitLookupTables", timings);
-    offset = InitLookupTables(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatClasses", timings);
+    TimingLogger::ScopedTiming split("InitOatClasses", timings_);
     offset = InitOatClasses(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatMaps", timings);
+    TimingLogger::ScopedTiming split("InitOatMaps", timings_);
     offset = InitOatMaps(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCode", timings);
+    TimingLogger::ScopedTiming split("InitOatCode", timings_);
     offset = InitOatCode(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_);
     offset = InitOatCodeDexFiles(offset);
   }
   size_ = offset;
@@ -255,7 +477,7 @@
     size_t bss_start = RoundUp(size_, kPageSize);
     size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
     bss_size_ = 0u;
-    for (const DexFile* dex_file : dex_files) {
+    for (const DexFile* dex_file : *dex_files_) {
       dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
       DexCacheArraysLayout layout(pointer_size, dex_file);
       bss_size_ += layout.Size();
@@ -265,9 +487,10 @@
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   if (compiling_boot_image_) {
     CHECK_EQ(image_writer_ != nullptr,
-             key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+             oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr);
   }
-  CHECK_ALIGNED(image_patch_delta_, kPageSize);
+
+  write_state_ = WriteState::kWriteRoData;
 }
 
 OatWriter::~OatWriter() {
@@ -1134,59 +1357,26 @@
   return true;
 }
 
-size_t OatWriter::InitOatHeader() {
-  oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                      compiler_driver_->GetInstructionSetFeatures(),
-                                      dchecked_integral_cast<uint32_t>(dex_files_->size()),
-                                      key_value_store_));
-  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
-  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
-
+size_t OatWriter::InitOatHeader(InstructionSet instruction_set,
+                                const InstructionSetFeatures* instruction_set_features,
+                                uint32_t num_dex_files,
+                                SafeMap<std::string, std::string>* key_value_store) {
+  TimingLogger::ScopedTiming split("InitOatHeader", timings_);
+  oat_header_.reset(OatHeader::Create(instruction_set,
+                                      instruction_set_features,
+                                      num_dex_files,
+                                      key_value_store));
+  size_oat_header_ += sizeof(OatHeader);
+  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
   return oat_header_->GetHeaderSize();
 }
 
 size_t OatWriter::InitOatDexFiles(size_t offset) {
-  // create the OatDexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    const DexFile* dex_file = (*dex_files_)[i];
-    CHECK(dex_file != nullptr);
-    oat_dex_files_.emplace_back(offset, *dex_file);
-    offset += oat_dex_files_.back().SizeOf();
-  }
-  return offset;
-}
-
-size_t OatWriter::InitDexFiles(size_t offset) {
-  // calculate the offsets within OatDexFiles to the DexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    // dex files are required to be 4 byte aligned
-    size_t original_offset = offset;
-    offset = RoundUp(offset, 4);
-    size_dex_file_alignment_ += offset - original_offset;
-
-    // set offset in OatDexFile to DexFile
-    oat_dex_files_[i].dex_file_offset_ = offset;
-
-    const DexFile* dex_file = (*dex_files_)[i];
-
-    // Initialize type lookup table
-    oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
-
-    offset += dex_file->GetHeader().file_size_;
-  }
-  return offset;
-}
-
-size_t OatWriter::InitLookupTables(size_t offset) {
+  TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+  // Initialize offsets of dex files.
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    if (oat_dex_file.lookup_table_ != nullptr) {
-      uint32_t aligned_offset = RoundUp(offset, 4);
-      oat_dex_file.lookup_table_offset_ = aligned_offset;
-      size_oat_lookup_table_alignment_ += aligned_offset - offset;
-      offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
-    } else {
-      oat_dex_file.lookup_table_offset_ = 0;
-    }
+    oat_dex_file.offset_ = offset;
+    offset += oat_dex_file.SizeOf();
   }
   return offset;
 }
@@ -1239,7 +1429,6 @@
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
   if (compiler_driver_->IsBootImage()) {
-    CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field, fn_name) \
@@ -1264,7 +1453,6 @@
     oat_header_->SetQuickImtConflictTrampolineOffset(0);
     oat_header_->SetQuickResolutionTrampolineOffset(0);
     oat_header_->SetQuickToInterpreterBridgeOffset(0);
-    oat_header_->SetImagePatchDelta(image_patch_delta_);
   }
   return offset;
 }
@@ -1289,22 +1477,15 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  if (!GetOatDataOffset(out)) {
+  CHECK(write_state_ == WriteState::kWriteRoData);
+
+  if (!WriteClassOffsets(out)) {
+    LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
   }
-  const size_t file_offset = oat_data_offset_;
 
-  // Reserve space for header. It will be written last - after updating the checksum.
-  size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
-    return false;
-  }
-  size_oat_header_ += sizeof(OatHeader);
-  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
-
-  if (!WriteTables(out, file_offset)) {
-    LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
+  if (!WriteClasses(out)) {
+    LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
     return false;
   }
 
@@ -1313,6 +1494,7 @@
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
+  size_t file_offset = oat_data_offset_;
   size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
@@ -1332,11 +1514,13 @@
   }
   DCHECK_OFFSET();
 
+  write_state_ = WriteState::kWriteText;
   return true;
 }
 
 bool OatWriter::WriteCode(OutputStream* out) {
-  size_t header_size = oat_header_->GetHeaderSize();
+  CHECK(write_state_ == WriteState::kWriteText);
+
   const size_t file_offset = oat_data_offset_;
   size_t relative_offset = oat_header_->GetExecutableOffset();
   DCHECK_OFFSET();
@@ -1390,10 +1574,12 @@
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
+    DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
-    DO_STAT(size_oat_dex_file_class_offsets_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
+    DO_STAT(size_oat_class_offsets_alignment_);
+    DO_STAT(size_oat_class_offsets_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
@@ -1408,88 +1594,90 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
-  // Finalize the header checksum.
+  write_state_ = WriteState::kWriteHeader;
+  return true;
+}
+
+bool OatWriter::WriteHeader(OutputStream* out,
+                            uint32_t image_file_location_oat_checksum,
+                            uintptr_t image_file_location_oat_begin,
+                            int32_t image_patch_delta) {
+  CHECK(write_state_ == WriteState::kWriteHeader);
+
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin);
+  if (compiler_driver_->IsBootImage()) {
+    CHECK_EQ(image_patch_delta, 0);
+    CHECK_EQ(oat_header_->GetImagePatchDelta(), 0);
+  } else {
+    CHECK_ALIGNED(image_patch_delta, kPageSize);
+    oat_header_->SetImagePatchDelta(image_patch_delta);
+  }
   oat_header_->UpdateChecksumWithHeaderData();
 
-  // Write the header now that the checksum is final.
+  const size_t file_offset = oat_data_offset_;
+
+  off_t current_offset = out->Seek(0, kSeekCurrent);
+  if (current_offset == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation();
+    return false;
+  }
   if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
+
+  // Flush all other data before writing the header.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation();
+    return false;
+  }
+  // Write the header.
+  size_t header_size = oat_header_->GetHeaderSize();
   if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+  // Flush the header data.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
 
+  if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent));
+
+  write_state_ = WriteState::kDone;
   return true;
 }
 
-bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
-      return false;
-    }
-  }
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    const DexFile* dex_file = (*dex_files_)[i];
-    if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) {
-      PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation()
-                  << " to " << out->GetLocation();
-      return false;
-    }
-    size_dex_file_ += dex_file->GetHeader().file_size_;
-  }
-  if (!WriteLookupTables(out, file_offset)) {
-    return false;
-  }
-  for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
-      return false;
-    }
-  }
-  return true;
-}
-
-bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
-    const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
-    const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
-    DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
-    if (lookup_table_offset == 0) {
-      continue;
-    }
-    const uint32_t expected_offset = file_offset + lookup_table_offset;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    if (table != nullptr) {
-      if (!WriteData(out, table->RawData(), table->RawDataLength())) {
-        const DexFile* dex_file = (*dex_files_)[i];
-        PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
-                    << " to " << out->GetLocation();
+bool OatWriter::WriteClassOffsets(OutputStream* out) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.class_offsets_offset_ != 0u) {
+      uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
+      off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+      if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+        PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
+                    << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
         return false;
       }
-      size_oat_lookup_table_ += table->RawDataLength();
+      if (!oat_dex_file.WriteClassOffsets(this, out)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool OatWriter::WriteClasses(OutputStream* out) {
+  for (OatClass& oat_class : oat_classes_) {
+    if (!oat_class.Write(this, out, oat_data_offset_)) {
+      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
+      return false;
     }
   }
   return true;
@@ -1585,6 +1773,455 @@
   return true;
 }
 
+bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
+  // Read the dex file header and perform minimal verification.
+  uint8_t raw_header[sizeof(DexFile::Header)];
+  if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
+    PLOG(ERROR) << "Failed to read dex file header. Actual: "
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
+    return false;
+  }
+
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->dex_file_location_checksum_ = header->checksum_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+  if (!DexFile::IsMagicValid(raw_header)) {
+    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+    return false;
+  }
+  if (!DexFile::IsVersionValid(raw_header)) {
+    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  if (header->file_size_ < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+               << " File: " << location;
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) {
+  TimingLogger::ScopedTiming split("WriteDexFiles", timings_);
+
+  // Get the elf file offset of the oat file.
+  if (!GetOatDataOffset(rodata)) {
+    return false;
+  }
+
+  // Write dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (!WriteDexFile(rodata, file, &oat_dex_file)) {
+      return false;
+    }
+  }
+
+  // Close sources.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.source_.Clear();  // Get rid of the reference, it's about to be invalidated.
+  }
+  zipped_dex_files_.clear();
+  zip_archives_.clear();
+  raw_dex_files_.clear();
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) {
+  if (!SeekToDexFile(rodata, file, oat_dex_file)) {
+    return false;
+  }
+  if (oat_dex_file->source_.IsZipEntry()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
+      return false;
+    }
+  } else if (oat_dex_file->source_.IsRawFile()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) {
+      return false;
+    }
+  } else {
+    DCHECK(oat_dex_file->source_.IsRawData());
+    if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) {
+      return false;
+    }
+  }
+
+  // Update current size and account for the written data.
+  DCHECK_EQ(size_, oat_dex_file->dex_file_offset_);
+  size_ += oat_dex_file->dex_file_size_;
+  size_dex_file_ += oat_dex_file->dex_file_size_;
+  return true;
+}
+
+bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) {
+  // Dex files are required to be 4 byte aligned.
+  size_t original_offset = size_;
+  size_t offset = RoundUp(original_offset, 4);
+  size_dex_file_alignment_ += offset - original_offset;
+
+  // Seek to the start of the dex file and flush any pending operations in the stream.
+  // Verify that, after flushing the stream, the file is at the same offset as the stream.
+  uint32_t start_offset = oat_data_offset_ + offset;
+  off_t actual_offset = out->Seek(start_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  size_ = offset;
+  oat_dex_file->dex_file_offset_ = offset;
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             ZipEntry* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  // Extract the dex file and get the extracted size.
+  std::string error_msg;
+  if (!dex_file->ExtractToFile(*file, &error_msg)) {
+    LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR);
+  if (extracted_end == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (extracted_end < static_cast<off_t>(start_offset)) {
+    LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end
+               << " Start: " << start_offset
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset);
+  if (extracted_size < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: "
+               << extracted_size << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Read the dex file header and extract required data to OatDexFile.
+  off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(file, oat_dex_file)) {
+    return false;
+  }
+  if (extracted_size < oat_dex_file->dex_file_size_) {
+    LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
+               << " file size from header: " << oat_dex_file->dex_file_size_
+               << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Override the checksum from header with the CRC from ZIP entry.
+  oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
+
+  // Seek both file and stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // If we extracted more than the size specified in the header, truncate the file.
+  if (extracted_size > oat_dex_file->dex_file_size_) {
+    if (file->SetLength(end_offset) != 0) {
+      PLOG(ERROR) << "Failed to truncate excessive dex file length."
+                  << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             File* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET);
+  if (input_offset != static_cast<off_t>(0)) {
+    PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset
+                << " Expected: 0"
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
+    return false;
+  }
+
+  // Copy the input dex file using sendfile().
+  if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
+    PLOG(ERROR) << "Failed to copy dex file to oat file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // Check file position and seek the stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             OatDexFile* oat_dex_file,
+                             const uint8_t* dex_file) {
+  // Note: The raw data has already been checked to contain the header
+  // and all the data that the header specifies as the file size.
+  DCHECK(dex_file != nullptr);
+  DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation()));
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file);
+
+  if (!rodata->WriteFully(dex_file, header->file_size_)) {
+    PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation()
+                << " to " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing dex file."
+                << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Update dex file size and resize class offsets in the OatDexFile.
+  // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
+  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+  // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader.  If there are
+  // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
+  // this Seek() ensures that we reserve the space for OatHeader in .rodata.
+  DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
+  uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
+  off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
+    return false;
+  }
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+
+    DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
+              static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
+
+    // Write OatDexFile.
+    if (!oat_dex_file->Write(this, rodata)) {
+      PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) {
+  TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_);
+
+  int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset);
+  if (file->SetLength(new_length) != 0) {
+    PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length
+        << "File: " << file->GetPath();
+    return false;
+  }
+  off_t actual_offset = rodata->Seek(new_length, kSeekSet);
+  if (actual_offset != static_cast<off_t>(new_length)) {
+    PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables."
+                << " Actual: " << actual_offset << " Expected: " << new_length
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables."
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::OpenDexFiles(
+    File* file,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  TimingLogger::ScopedTiming split("OpenDexFiles", timings_);
+
+  if (oat_dex_files_.empty()) {
+    // Nothing to do.
+    return true;
+  }
+
+  size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+  size_t length = size_ - map_offset;
+  std::string error_msg;
+  std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length,
+                                                        PROT_READ | PROT_WRITE,
+                                                        MAP_SHARED,
+                                                        file->Fd(),
+                                                        oat_data_offset_ + map_offset,
+                                                        /* low_4gb */ false,
+                                                        file->GetPath().c_str(),
+                                                        &error_msg));
+  if (dex_files_map == nullptr) {
+    LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath()
+               << " error: " << error_msg;
+    return false;
+  }
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    // Make sure no one messed with input files while we were copying data.
+    // At the very least we need consistent file size and number of class definitions.
+    const uint8_t* raw_dex_file =
+        dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
+    if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
+      // Note: ValidateDexFileHeader() already logged an error message.
+      LOG(ERROR) << "Failed to verify written dex file header!"
+          << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
+          << " ~ " << static_cast<const void*>(raw_dex_file);
+      return false;
+    }
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+    if (header->file_size_ != oat_dex_file.dex_file_size_) {
+      LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+          << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+    if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
+      LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
+          << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+
+    // Now, open the dex file.
+    dex_files.emplace_back(DexFile::Open(raw_dex_file,
+                                         oat_dex_file.dex_file_size_,
+                                         oat_dex_file.GetLocation(),
+                                         oat_dex_file.dex_file_location_checksum_,
+                                         /* oat_dex_file */ nullptr,
+                                         &error_msg));
+    if (dex_files.back() == nullptr) {
+      LOG(ERROR) << "Failed to open dex file from oat file. File:" << oat_dex_file.GetLocation();
+      return false;
+    }
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  return true;
+}
+
+bool OatWriter::WriteTypeLookupTables(
+    MemMap* opened_dex_files_map,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_);
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    if (oat_dex_file->lookup_table_offset_ != 0u) {
+      DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate);
+      DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u);
+      size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+      size_t lookup_table_offset = oat_dex_file->lookup_table_offset_;
+      uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset);
+      opened_dex_files[i]->CreateTypeLookupTable(lookup_table);
+    }
+  }
+
+  DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty());
+  if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) {
+    PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1611,15 +2248,20 @@
   }
 }
 
-OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
-  offset_ = offset;
-  const std::string& location(dex_file.GetLocation());
-  dex_file_location_size_ = location.size();
-  dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
-  dex_file_location_checksum_ = dex_file.GetLocationChecksum();
-  dex_file_offset_ = 0;
-  lookup_table_offset_ = 0;
-  class_offsets_.resize(dex_file.NumClassDefs());
+OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
+                                  DexFileSource source,
+                                  CreateTypeLookupTable create_type_lookup_table)
+    : source_(source),
+      create_type_lookup_table_(create_type_lookup_table),
+      dex_file_size_(0),
+      offset_(0),
+      dex_file_location_size_(strlen(dex_file_location)),
+      dex_file_location_data_(dex_file_location),
+      dex_file_location_checksum_(0u),
+      dex_file_offset_(0u),
+      class_offsets_offset_(0u),
+      lookup_table_offset_(0u),
+      class_offsets_() {
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1627,24 +2269,54 @@
           + dex_file_location_size_
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
-          + sizeof(lookup_table_offset_)
-          + (sizeof(class_offsets_[0]) * class_offsets_.size());
+          + sizeof(class_offsets_offset_)
+          + sizeof(lookup_table_offset_);
 }
 
-bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
-                                  OutputStream* out,
-                                  const size_t file_offset) const {
+void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) {
+  DCHECK_EQ(lookup_table_offset_, 0u);
+  if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) {
+    size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size());
+    if (table_size != 0u) {
+      // Type tables are required to be 4 byte aligned.
+      size_t original_offset = oat_writer->size_;
+      size_t offset = RoundUp(original_offset, 4);
+      oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset;
+      lookup_table_offset_ = offset;
+      oat_writer->size_ = offset + table_size;
+      oat_writer->size_oat_lookup_table_ += table_size;
+    }
+  }
+}
+
+void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
+  DCHECK_EQ(class_offsets_offset_, 0u);
+  if (!class_offsets_.empty()) {
+    // Class offsets are required to be 4 byte aligned.
+    size_t original_offset = oat_writer->size_;
+    size_t offset = RoundUp(original_offset, 4);
+    oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset;
+    class_offsets_offset_ = offset;
+    oat_writer->size_ = offset + GetClassOffsetsRawSize();
+  }
+}
+
+bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
+  const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
+
   if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
+
   if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
+
   if (!oat_writer->WriteData(out,
                              &dex_file_location_checksum_,
                              sizeof(dex_file_location_checksum_))) {
@@ -1652,21 +2324,35 @@
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
+
   if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
+
+  if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+    PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
+
   if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+
+  return true;
+}
+
+bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
   if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
-    PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
+    PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
+                << " to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
+  oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5feb5fc..d681998 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -27,7 +27,9 @@
 #include "method_reference.h"
 #include "mirror/class.h"
 #include "oat.h"
+#include "os.h"
 #include "safe_map.h"
+#include "ScopedFd.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -39,6 +41,7 @@
 class OutputStream;
 class TimingLogger;
 class TypeLookupTable;
+class ZipEntry;
 
 namespace dwarf {
 struct MethodDebugInfo;
@@ -61,6 +64,11 @@
 // ...
 // TypeLookupTable[D]
 //
+// ClassOffsets[0]   one table of OatClass offsets for each class def for each OatDexFile.
+// ClassOffsets[1]
+// ...
+// ClassOffsets[D]
+//
 // OatClass[0]       one variable sized OatClass for each of C DexFile::ClassDefs
 // OatClass[1]       contains OatClass entries with class status, offsets to code, etc.
 // ...
@@ -93,15 +101,65 @@
 //
 class OatWriter {
  public:
-  OatWriter(const std::vector<const DexFile*>& dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uintptr_t image_file_location_oat_begin,
-            int32_t image_patch_delta,
-            const CompilerDriver* compiler,
-            ImageWriter* image_writer,
-            bool compiling_boot_image,
-            TimingLogger* timings,
-            SafeMap<std::string, std::string>* key_value_store);
+  enum class CreateTypeLookupTable {
+    kCreate,
+    kDontCreate,
+    kDefault = kCreate
+  };
+
+  OatWriter(bool compiling_boot_image, TimingLogger* timings);
+
+  // To produce a valid oat file, the user must first add sources with any combination of
+  //   - AddDexFileSource(),
+  //   - AddZippedDexFilesSource(),
+  //   - AddRawDexFileSource().
+  // Then the user must call in order
+  //   - WriteAndOpenDexFiles()
+  //   - PrepareLayout(),
+  //   - WriteRodata(),
+  //   - WriteCode(),
+  //   - WriteHeader().
+
+  // Add dex file source(s) from a file, either a plain dex file or
+  // a zip file with one or more dex files.
+  bool AddDexFileSource(
+      const char* filename,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source(s) from a zip file specified by a file handle.
+  bool AddZippedDexFilesSource(
+      ScopedFd&& zip_fd,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source from raw memory.
+  bool AddRawDexFileSource(
+      const ArrayRef<const uint8_t>& data,
+      const char* location,
+      uint32_t location_checksum,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  dchecked_vector<const char*> GetSourceLocations() const;
+
+  // Write raw dex files to the .rodata section and open them from the oat file.
+  bool WriteAndOpenDexFiles(OutputStream* rodata,
+                            File* file,
+                            InstructionSet instruction_set,
+                            const InstructionSetFeatures* instruction_set_features,
+                            SafeMap<std::string, std::string>* key_value_store,
+                            /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                            /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  // Prepare layout of remaining data.
+  void PrepareLayout(const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
+                     const std::vector<const DexFile*>& dex_files);
+  // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
+  bool WriteRodata(OutputStream* out);
+  // Write the code to the .text section.
+  bool WriteCode(OutputStream* out);
+  // Write the oat header. This finalizes the oat file.
+  bool WriteHeader(OutputStream* out,
+                   uint32_t image_file_location_oat_checksum,
+                   uintptr_t image_file_location_oat_begin,
+                   int32_t image_patch_delta);
 
   // Returns whether the oat file has an associated image.
   bool HasImage() const {
@@ -130,9 +188,6 @@
     return ArrayRef<const uintptr_t>(absolute_patch_locations_);
   }
 
-  bool WriteRodata(OutputStream* out);
-  bool WriteCode(OutputStream* out);
-
   ~OatWriter();
 
   ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
@@ -144,6 +199,7 @@
   }
 
  private:
+  class DexFileSource;
   class OatClass;
   class OatDexFile;
 
@@ -174,29 +230,65 @@
   // with a given DexMethodVisitor.
   bool VisitDexMethods(DexMethodVisitor* visitor);
 
-  size_t InitOatHeader();
+  size_t InitOatHeader(InstructionSet instruction_set,
+                       const InstructionSetFeatures* instruction_set_features,
+                       uint32_t num_dex_files,
+                       SafeMap<std::string, std::string>* key_value_store);
   size_t InitOatDexFiles(size_t offset);
-  size_t InitLookupTables(size_t offset);
-  size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
   size_t InitOatCode(size_t offset);
   size_t InitOatCodeDexFiles(size_t offset);
 
-  bool WriteTables(OutputStream* out, const size_t file_offset);
-  bool WriteLookupTables(OutputStream* out, const size_t file_offset);
+  bool WriteClassOffsets(OutputStream* out);
+  bool WriteClasses(OutputStream* out);
   size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
   bool GetOatDataOffset(OutputStream* out);
+  bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file);
+  bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
+  bool WriteDexFiles(OutputStream* rodata, File* file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file);
+  bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file);
+  bool WriteOatDexFiles(OutputStream* rodata);
+  bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset);
+  bool OpenDexFiles(File* file,
+                    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
+                             const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
   bool WriteData(OutputStream* out, const void* data, size_t size);
 
+  enum class WriteState {
+    kAddingDexFileSources,
+    kPrepareLayout,
+    kWriteRoData,
+    kWriteText,
+    kWriteHeader,
+    kDone
+  };
+
+  WriteState write_state_;
+  TimingLogger* timings_;
+
+  std::vector<std::unique_ptr<File>> raw_dex_files_;
+  std::vector<std::unique_ptr<ZipArchive>> zip_archives_;
+  std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_;
+
+  // Using std::list<> which doesn't move elements around on push/emplace_back().
+  // We need this because we keep plain pointers to the strings' c_str().
+  std::list<std::string> zipped_dex_file_locations_;
+
   dchecked_vector<dwarf::MethodDebugInfo> method_info_;
 
-  const CompilerDriver* const compiler_driver_;
-  ImageWriter* const image_writer_;
+  const CompilerDriver* compiler_driver_;
+  ImageWriter* image_writer_;
   const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
@@ -215,13 +307,7 @@
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
-  // dependencies on the image.
-  uint32_t image_file_location_oat_checksum_;
-  uintptr_t image_file_location_oat_begin_;
-  int32_t image_patch_delta_;
-
   // data to write
-  SafeMap<std::string, std::string>* key_value_store_;
   std::unique_ptr<OatHeader> oat_header_;
   dchecked_vector<OatDexFile> oat_dex_files_;
   dchecked_vector<OatClass> oat_classes_;
@@ -257,10 +343,12 @@
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
+  uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
-  uint32_t size_oat_dex_file_class_offsets_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
+  uint32_t size_oat_class_offsets_alignment_;
+  uint32_t size_oat_class_offsets_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
@@ -269,7 +357,7 @@
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
   // The locations of absolute patches relative to the start of the executable section.
-  std::vector<uintptr_t> absolute_patch_locations_;
+  dchecked_vector<uintptr_t> absolute_patch_locations_;
 
   // Map method reference to assigned offset.
   // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index b9df686..6dc5320 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -29,13 +29,13 @@
   BoundsCheckElimination(HGraph* graph,
                          const SideEffectsAnalysis& side_effects,
                          HInductionVarAnalysis* induction_analysis)
-      : HOptimization(graph, kBoundsCheckEliminiationPassName),
+      : HOptimization(graph, kBoundsCheckEliminationPassName),
         side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
 
-  static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
+  static constexpr const char* kBoundsCheckEliminationPassName = "BCE";
 
  private:
   const SideEffectsAnalysis& side_effects_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1af6846..3721813 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1271,44 +1271,14 @@
 
   // Add move-result for StringFactory method.
   uint32_t orig_this_reg = is_range ? register_index : args[0];
-  HInstruction* fake_string = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
-  invoke->SetArgumentAt(argument_index, fake_string);
+  HInstruction* new_instance = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
+  invoke->SetArgumentAt(argument_index, new_instance);
   current_block_->AddInstruction(invoke);
-  PotentiallySimplifyFakeString(orig_this_reg, invoke->GetDexPc(), invoke);
 
   latest_result_ = invoke;
-
   return true;
 }
 
-void HGraphBuilder::PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                                  uint32_t dex_pc,
-                                                  HInvoke* actual_string) {
-  if (!graph_->IsDebuggable()) {
-    // Notify that we cannot compile with baseline. The dex registers aliasing
-    // with `original_dex_register` will be handled when we optimize
-    // (see HInstructionSimplifer::VisitFakeString).
-    can_use_baseline_for_string_init_ = false;
-    return;
-  }
-  const VerifiedMethod* verified_method =
-      compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
-  if (verified_method != nullptr) {
-    UpdateLocal(original_dex_register, actual_string, dex_pc);
-    const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
-        verified_method->GetStringInitPcRegMap();
-    auto map_it = string_init_map.find(dex_pc);
-    if (map_it != string_init_map.end()) {
-      for (uint32_t reg : map_it->second) {
-        HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot, dex_pc);
-        UpdateLocal(reg, load_local, dex_pc);
-      }
-    }
-  } else {
-    can_use_baseline_for_string_init_ = false;
-  }
-}
-
 static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
   const char* type = dex_file.GetFieldTypeDescriptor(field_id);
@@ -2698,18 +2668,10 @@
     }
 
     case Instruction::NEW_INSTANCE: {
-      uint16_t type_index = instruction.VRegB_21c();
-      if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) {
-        int32_t register_index = instruction.VRegA();
-        HFakeString* fake_string = new (arena_) HFakeString(dex_pc);
-        current_block_->AddInstruction(fake_string);
-        UpdateLocal(register_index, fake_string, dex_pc);
-      } else {
-        if (!BuildNewInstance(type_index, dex_pc)) {
-          return false;
-        }
-        UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
+      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+        return false;
       }
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 26bf1cb..1d604e7 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -56,7 +56,6 @@
         return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(compiler_stats),
         interpreter_metadata_(interpreter_metadata),
         dex_cache_(dex_cache) {}
@@ -77,7 +76,6 @@
         return_type_(return_type),
         code_start_(nullptr),
         latest_result_(nullptr),
-        can_use_baseline_for_string_init_(true),
         compilation_stats_(nullptr),
         interpreter_metadata_(nullptr),
         null_dex_cache_(),
@@ -85,10 +83,6 @@
 
   bool BuildGraph(const DexFile::CodeItem& code);
 
-  bool CanUseBaselineForStringInit() const {
-    return can_use_baseline_for_string_init_;
-  }
-
   static constexpr const char* kBuilderPassName = "builder";
 
   // The number of entries in a packed switch before we use a jump table or specified
@@ -363,11 +357,6 @@
   // used by move-result instructions.
   HInstruction* latest_result_;
 
-  // We need to know whether we have built a graph that has calls to StringFactory
-  // and hasn't gone through the verifier. If the following flag is `false`, then
-  // we cannot compile with baseline.
-  bool can_use_baseline_for_string_init_;
-
   OptimizingCompilerStats* compilation_stats_;
 
   const uint8_t* interpreter_metadata_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ea0b9ec..a3bbfdb 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -142,23 +142,6 @@
   return pointer_size * index;
 }
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
-  Initialize();
-  if (!is_leaf) {
-    MarkNotLeaf();
-  }
-  const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
-  InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
-                             + GetGraph()->GetTemporariesVRegSlots()
-                             + 1 /* filler */,
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           0, /* the baseline compiler does not have live registers at slow path */
-                           GetGraph()->GetMaximumNumberOfOutVRegs()
-                             + (is_64_bit ? 2 : 1) /* current method */,
-                           GetGraph()->GetBlocks());
-  CompileInternal(allocator, /* is_baseline */ true);
-}
-
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -220,8 +203,12 @@
   current_slow_path_ = nullptr;
 }
 
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
-  is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+  // The register allocator already called `InitializeCodeGeneration`,
+  // where the frame size has been computed.
+  DCHECK(block_order_ != nullptr);
+  Initialize();
+
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
 
@@ -242,9 +229,6 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       DisassemblyScope disassembly_scope(current, *this);
-      if (is_baseline) {
-        InitLocationsBaseline(current);
-      }
       DCHECK(CheckTypeConsistency(current));
       current->Accept(instruction_visitor);
     }
@@ -254,7 +238,7 @@
 
   // Emit catch stack maps at the end of the stack map stream as expected by the
   // runtime exception handler.
-  if (!is_baseline && graph_->HasTryCatch()) {
+  if (graph_->HasTryCatch()) {
     RecordCatchBlockInfo();
   }
 
@@ -262,14 +246,6 @@
   Finalize(allocator);
 }
 
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
-  // The register allocator already called `InitializeCodeGeneration`,
-  // where the frame size has been computed.
-  DCHECK(block_order_ != nullptr);
-  Initialize();
-  CompileInternal(allocator, /* is_baseline */ false);
-}
-
 void CodeGenerator::Finalize(CodeAllocator* allocator) {
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -282,29 +258,6 @@
   // No linker patches by default.
 }
 
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
-  for (size_t i = 0; i < length; ++i) {
-    if (!array[i]) {
-      array[i] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
-  for (size_t i = 0; i < length - 1; i += 2) {
-    if (!array[i] && !array[i + 1]) {
-      array[i] = true;
-      array[i + 1] = true;
-      return i;
-    }
-  }
-  LOG(FATAL) << "Could not find a register in baseline register allocator";
-  UNREACHABLE();
-}
-
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
                                              size_t maximum_number_of_live_fpu_registers,
@@ -592,123 +545,6 @@
   }
 }
 
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
-  LocationSummary* locations = instruction->GetLocations();
-  if (locations == nullptr) return;
-
-  for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
-    blocked_core_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
-    blocked_fpu_registers_[i] = false;
-  }
-
-  for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
-    blocked_register_pairs_[i] = false;
-  }
-
-  // Mark all fixed input, temp and output registers as used.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    BlockIfInRegister(locations->InAt(i));
-  }
-
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    BlockIfInRegister(loc);
-  }
-  Location result_location = locations->Out();
-  if (locations->OutputCanOverlapWithInputs()) {
-    BlockIfInRegister(result_location, /* is_out */ true);
-  }
-
-  SetupBlockedRegisters(/* is_baseline */ true);
-
-  // Allocate all unallocated input locations.
-  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
-    Location loc = locations->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (loc.IsUnallocated()) {
-      if ((loc.GetPolicy() == Location::kRequiresRegister)
-          || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
-        loc = AllocateFreeRegister(input->GetType());
-      } else {
-        DCHECK_EQ(loc.GetPolicy(), Location::kAny);
-        HLoadLocal* load = input->AsLoadLocal();
-        if (load != nullptr) {
-          loc = GetStackLocation(load);
-        } else {
-          loc = AllocateFreeRegister(input->GetType());
-        }
-      }
-      locations->SetInAt(i, loc);
-    }
-  }
-
-  // Allocate all unallocated temp locations.
-  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
-    Location loc = locations->GetTemp(i);
-    if (loc.IsUnallocated()) {
-      switch (loc.GetPolicy()) {
-        case Location::kRequiresRegister:
-          // Allocate a core register (large enough to fit a 32-bit integer).
-          loc = AllocateFreeRegister(Primitive::kPrimInt);
-          break;
-
-        case Location::kRequiresFpuRegister:
-          // Allocate a core register (large enough to fit a 64-bit double).
-          loc = AllocateFreeRegister(Primitive::kPrimDouble);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << loc.GetPolicy();
-      }
-      locations->SetTempAt(i, loc);
-    }
-  }
-  if (result_location.IsUnallocated()) {
-    switch (result_location.GetPolicy()) {
-      case Location::kAny:
-      case Location::kRequiresRegister:
-      case Location::kRequiresFpuRegister:
-        result_location = AllocateFreeRegister(instruction->GetType());
-        break;
-      case Location::kSameAsFirstInput:
-        result_location = locations->InAt(0);
-        break;
-    }
-    locations->UpdateOut(result_location);
-  }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
-  AllocateLocations(instruction);
-  if (instruction->GetLocations() == nullptr) {
-    if (instruction->IsTemporary()) {
-      HInstruction* previous = instruction->GetPrevious();
-      Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-      Move(previous, temp_location, instruction);
-    }
-    return;
-  }
-  AllocateRegistersLocally(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    Location location = instruction->GetLocations()->InAt(i);
-    HInstruction* input = instruction->InputAt(i);
-    if (location.IsValid()) {
-      // Move the input to the desired location.
-      if (input->GetNext()->IsTemporary()) {
-        // If the input was stored in a temporary, use that temporary to
-        // perform the move.
-        Move(input->GetNext(), location, instruction);
-      } else {
-        Move(input, location, instruction);
-      }
-    }
-  }
-}
-
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
@@ -789,132 +625,6 @@
   }
 }
 
-void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
-  const std::vector<uint8_t>& gc_map_raw =
-      compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
-          ->GetDexGcMap();
-  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-
-  uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
-  size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
-  GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
-  for (size_t i = 0; i != num_stack_maps; ++i) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    uint32_t native_offset = stack_map_entry.native_pc_offset;
-    uint32_t dex_pc = stack_map_entry.dex_pc;
-    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
-    builder.AddEntry(native_offset, references);
-  }
-}
-
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
-  uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
-  uint32_t pc2dex_offset = 0u;
-  int32_t pc2dex_dalvik_offset = 0;
-  uint32_t dex2pc_data_size = 0u;
-  uint32_t dex2pc_entries = 0u;
-  uint32_t dex2pc_offset = 0u;
-  int32_t dex2pc_dalvik_offset = 0;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
-    pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  // Walk over the blocks and find which ones correspond to catch block entries.
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      ++dex2pc_entries;
-      dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
-      dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
-  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
-  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
-  data->resize(data_size);
-
-  uint8_t* data_ptr = &(*data)[0];
-  uint8_t* write_pos = data_ptr;
-
-  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
-  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
-  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
-  pc2dex_offset = 0u;
-  pc2dex_dalvik_offset = 0u;
-  dex2pc_offset = 0u;
-  dex2pc_dalvik_offset = 0u;
-
-  for (size_t i = 0; i < pc2dex_entries; i++) {
-    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-    DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
-    write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
-    write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = stack_map_entry.native_pc_offset;
-    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
-  }
-
-  for (HBasicBlock* block : graph_->GetBlocks()) {
-    if (block->IsCatchBlock()) {
-      intptr_t native_pc = GetAddressOf(block);
-      write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
-      write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
-      dex2pc_offset = native_pc;
-      dex2pc_dalvik_offset = block->GetDexPc();
-    }
-  }
-
-
-  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
-  DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
-  if (kIsDebugBuild) {
-    // Verify the encoded table holds the expected data.
-    MappingTable table(data_ptr);
-    CHECK_EQ(table.TotalSize(), total_entries);
-    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
-    auto it = table.PcToDexBegin();
-    auto it2 = table.DexToPcBegin();
-    for (size_t i = 0; i < pc2dex_entries; i++) {
-      const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
-      CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
-      CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
-      ++it;
-    }
-    for (HBasicBlock* block : graph_->GetBlocks()) {
-      if (block->IsCatchBlock()) {
-        CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
-        CHECK_EQ(block->GetDexPc(), it2.DexPc());
-        ++it2;
-      }
-    }
-    CHECK(it == table.PcToDexEnd());
-    CHECK(it2 == table.DexToPcEnd());
-  }
-}
-
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
-  // We currently don't use callee-saved registers.
-  size_t size = 0 + 1 /* marker */ + 0;
-  vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
-  vmap_encoder.PushBackUnsigned(size);
-  vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
 size_t CodeGenerator::ComputeStackMapsSize() {
   return stack_map_stream_.PrepareForFillIn();
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5958cd8..4f8f146 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -158,10 +158,8 @@
 
 class CodeGenerator {
  public:
-  // Compiles the graph to executable instructions. Returns whether the compilation
-  // succeeded.
-  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
-  void CompileOptimized(CodeAllocator* allocator);
+  // Compiles the graph to executable instructions.
+  void Compile(CodeAllocator* allocator);
   static CodeGenerator* Create(HGraph* graph,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures& isa_features,
@@ -214,7 +212,7 @@
 
   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
-  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+  virtual void SetupBlockedRegisters() const = 0;
 
   virtual void ComputeSpillMask() {
     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -290,17 +288,9 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
-  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
-  void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
   void BuildStackMaps(MemoryRegion region);
   size_t ComputeStackMapsSize();
 
-  bool IsBaseline() const {
-    return is_baseline_;
-  }
-
   bool IsLeafMethod() const {
     return is_leaf_;
   }
@@ -489,7 +479,6 @@
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
-        is_baseline_(false),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -502,15 +491,6 @@
     slow_paths_.reserve(8);
   }
 
-  // Register allocation logic.
-  void AllocateRegistersLocally(HInstruction* instruction) const;
-
-  // Backend specific implementation for allocating a register.
-  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
-  static size_t FindFreeEntry(bool* array, size_t length);
-  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
   virtual HGraphVisitor* GetLocationBuilder() = 0;
@@ -593,16 +573,11 @@
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
-  // Whether we are using baseline.
-  bool is_baseline_;
-
   DisassemblyInformation* disasm_info_;
 
  private:
-  void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
-  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d64b878..f265a0c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,9 +47,7 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = R0;
 
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
 static constexpr Register kCoreCalleeSaves[] =
     { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
@@ -728,6 +726,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return EQ;
+    case kCondNE: return NE /* unordered */;
+    case kCondLT: return gt_bias ? CC : LT /* unordered */;
+    case kCondLE: return gt_bias ? LS : LE /* unordered */;
+    case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+    case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -815,58 +831,7 @@
   CodeGenerator::Finalize(allocator);
 }
 
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      ArmManagedRegister pair =
-          ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        ArmManagedRegister current =
-            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimDouble: {
-      int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
-      DCHECK_EQ(reg % 2, 0);
-      return Location::FpuRegisterPairLocation(reg, reg + 1);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[R1_R2] = true;
 
@@ -881,15 +846,7 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
@@ -919,11 +876,10 @@
 
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
-  // Save one extra register for baseline. Note that on thumb2, there is no easy
-  // instruction to restore just the PC, so this actually helps both baseline
-  // and non-baseline to save and restore at least two registers at entry and exit.
-  core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
   DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // There is no easy instruction to restore just the PC on thumb2. We spill and
+  // restore another arbitrary register.
+  core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   // We use vpush and vpop for saving and restoring floating point registers, which take
   // a SRegister and the number of registers to save/restore after that SRegister. We
@@ -1416,15 +1372,9 @@
 
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
-                                                  Label* false_label) {
+                                                  Label* false_label ATTRIBUTE_UNUSED) {
   __ vmstat();  // transfer FP status register to ARM APSR.
-  // TODO: merge into a single branch (except "equal or unordered" and "not equal")
-  if (cond->IsFPConditionTrueIfNaN()) {
-    __ b(true_label, VS);  // VS for unordered.
-  } else if (cond->IsFPConditionFalseIfNaN()) {
-    __ b(false_label, VS);  // VS for unordered.
-  }
-  __ b(true_label, ARMCondition(cond->GetCondition()));
+  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
 }
 
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1972,9 +1922,9 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetAssembler(),
@@ -2004,9 +1954,9 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3651,20 +3601,34 @@
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(Location::RegisterLocation(R0));
 }
 
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize);
+    __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadWord, LR, temp, code_offset.Int32Value());
+    __ blx(LR);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -3789,6 +3753,7 @@
 
   Label less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond;
   switch (type) {
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3799,6 +3764,7 @@
       __ LoadImmediate(out, 0);
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()));  // Unsigned compare.
+      less_cond = LO;
       break;
     }
     case Primitive::kPrimFloat:
@@ -3811,14 +3777,15 @@
                  FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
       }
       __ vmstat();  // transfer FP status register to ARM APSR.
-      __ b(compare->IsGtBias() ? &greater : &less, VS);  // VS for unordered.
+      less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
+      UNREACHABLE();
   }
   __ b(&done, EQ);
-  __ b(&less, LO);  // LO is for both: unsigned compare for longs and 'less than' for floats.
+  __ b(&less, less_cond);
 
   __ Bind(&greater);
   __ LoadImmediate(out, 1);
@@ -5516,7 +5483,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5726,8 +5693,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6013,6 +5980,7 @@
           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
       codegen_->AddSlowPath(slow_path);
 
+      // IP = Thread::Current()->GetIsGcMarking()
       __ LoadFromOffset(
           kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
       __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6091,11 +6059,8 @@
   //   }
   //
   // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as:
-  // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
-  // - it performs additional checks that we do not do here for
-  //   performance reasons.
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
 
   Register ref_reg = ref.AsRegister<Register>();
   Register temp_reg = temp.AsRegister<Register>();
@@ -6484,6 +6449,33 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
+void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations = instr->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
+  Register accumulator = locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex)
+                                      .AsRegister<Register>();
+  Register mul_left = locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex)
+                                  .AsRegister<Register>();
+  Register mul_right = locations->InAt(HMultiplyAccumulate::kInputMulRightIndex)
+                                    .AsRegister<Register>();
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ mla(res, mul_left, mul_right, accumulator);
+  } else {
+    __ mls(res, mul_left, mul_right, accumulator);
+  }
+}
+
 void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
@@ -6494,18 +6486,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderARM::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorARM::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 26d6d63..df2126c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,6 +159,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -197,6 +198,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -340,9 +342,7 @@
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -444,7 +444,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -452,7 +452,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a3150d3..3fdd7186 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@
   UNREACHABLE();
 }
 
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+  // The ARM64 condition codes can express all the necessary branches, see the
+  // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+  // There is no dex instruction or HIR that would need the missing conditions
+  // "equal or unordered" or "not equal".
+  switch (cond) {
+    case kCondEQ: return eq;
+    case kCondNE: return ne /* unordered */;
+    case kCondLT: return gt_bias ? cc : lt /* unordered */;
+    case kCondLE: return gt_bias ? ls : le /* unordered */;
+    case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+    case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
 Location ARM64ReturnLocation(Primitive::Type return_type) {
   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -604,30 +622,13 @@
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
             instruction_->GetLocations()->Intrinsified()));
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
 
     __ Bind(GetEntryLabel());
 
-    // Note: In the case of a HArrayGet instruction, when the base
-    // address is a HArm64IntermediateAddress instruction, it does not
-    // point to the array object itself, but to an offset within this
-    // object. However, the read barrier entry point needs the array
-    // object address to be passed as first argument. So we
-    // temporarily set back `obj_` to that address, and restore its
-    // initial value later.
-    if (instruction_->IsArrayGet() &&
-        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* intermediate_address =
-            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
-        uint32_t intermediate_address_offset =
-            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
-        DCHECK_EQ(intermediate_address_offset, offset_);
-        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
-      }
-      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
-      __ Sub(obj_reg, obj_reg, offset_);
-    }
-
     SaveLiveRegisters(codegen, locations);
 
     // We may have to change the index's value, but as `index_` is a
@@ -728,22 +729,6 @@
 
     RestoreLiveRegisters(codegen, locations);
 
-    // Restore the value of `obj_` when it corresponds to a
-    // HArm64IntermediateAddress instruction.
-    if (instruction_->IsArrayGet() &&
-        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* intermediate_address =
-            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
-        uint32_t intermediate_address_offset =
-            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
-        DCHECK_EQ(intermediate_address_offset, offset_);
-        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
-      }
-      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
-      __ Add(obj_reg, obj_reg, offset_);
-    }
-
     __ B(GetExitLabel());
   }
 
@@ -1032,13 +1017,6 @@
   Primitive::Type type = instruction->GetType();
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (instruction->IsFakeString()) {
-    // The fake string is an alias for null.
-    DCHECK(IsBaseline());
-    instruction = locations->Out().GetConstant();
-    DCHECK(instruction->IsNullConstant()) << instruction->DebugName();
-  }
-
   if (instruction->IsCurrentMethod()) {
     MoveLocation(location,
                  Location::DoubleStackSlot(kCurrentMethodStackOffset),
@@ -1134,7 +1112,7 @@
   }
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
   // Blocked core registers:
   //      lr        : Runtime reserved.
   //      tr        : Runtime reserved.
@@ -1155,40 +1133,17 @@
     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
   }
 
-  if (is_baseline) {
-    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
-    while (!reserved_core_baseline_registers.IsEmpty()) {
-      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
-    }
-  }
-
-  if (is_baseline || GetGraph()->IsDebuggable()) {
+  if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
     // now, just block them.
-    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
-    while (!reserved_fp_baseline_registers.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+    while (!reserved_fp_registers_debuggable.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
     }
   }
 }
 
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
-    DCHECK_NE(reg, -1);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   __ Str(reg, MemOperand(sp, stack_index));
@@ -1977,6 +1932,9 @@
 }
 
 void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the
+  // HArm64IntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1986,26 +1944,35 @@
 
 void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
     HArm64IntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the
+  // HArm64IntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
-                     Location::RequiresRegister());
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+  if (instr->GetOpKind() == HInstruction::kSub &&
+      accumulator->IsConstant() &&
+      accumulator->AsConstant()->IsZero()) {
+    // Don't allocate register for Mneg instruction.
+  } else {
+    locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                       Location::RequiresRegister());
+  }
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   Register res = OutputRegister(instr);
-  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
-  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
-  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+  Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
 
   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
   // This fixup should be carried out for all multiply-accumulate instructions:
@@ -2025,10 +1992,18 @@
   }
 
   if (instr->GetOpKind() == HInstruction::kAdd) {
+    Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
     __ Madd(res, mul_left, mul_right, accumulator);
   } else {
     DCHECK(instr->GetOpKind() == HInstruction::kSub);
-    __ Msub(res, mul_left, mul_right, accumulator);
+    HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+    if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) {
+      __ Mneg(res, mul_left, mul_right);
+    } else {
+      Register accumulator = InputRegisterAt(instr,
+                                             HMultiplyAccumulate::kInputAccumulatorIndex);
+      __ Msub(res, mul_left, mul_right, accumulator);
+    }
   }
 }
 
@@ -2074,6 +2049,9 @@
   } else {
     Register temp = temps.AcquireSameSizeAs(obj);
     if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      // The read barrier instrumentation does not support the
+      // HArm64IntermediateAddress instruction yet.
+      DCHECK(!kEmitCompilerReadBarrier);
       // We do not need to compute the intermediate address from the array: the
       // input instruction has done it already. See the comment in
       // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2100,11 +2078,6 @@
     if (index.IsConstant()) {
       codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
     } else {
-      // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
-      // not contain the base address of the array object, which is
-      // needed by the read barrier entry point. So the read barrier
-      // slow path will temporarily set back `obj_loc` to the right
-      // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
       codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
     }
   }
@@ -2168,6 +2141,9 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HArm64IntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2414,12 +2390,8 @@
       } else {
         __ Fcmp(left, InputFPRegisterAt(compare, 1));
       }
-      if (compare->IsGtBias()) {
-        __ Cset(result, ne);
-      } else {
-        __ Csetm(result, ne);
-      }
-      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+      __ Cset(result, ne);
+      __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
       break;
     }
     default:
@@ -2455,7 +2427,6 @@
   LocationSummary* locations = instruction->GetLocations();
   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   IfCondition if_cond = instruction->GetCondition();
-  Condition arm64_cond = ARM64Condition(if_cond);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
     FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2466,20 +2437,13 @@
     } else {
       __ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
     }
-    __ Cset(res, arm64_cond);
-    if (instruction->IsFPConditionTrueIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 1 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 1
-      __ Csel(res, res, Operand(1), vc);  // VC for "not unordered".
-    } else if (instruction->IsFPConditionFalseIfNaN()) {
-      // res = IsUnordered(arm64_cond) ? 0 : res  <=>  res = IsNotUnordered(arm64_cond) ? res : 0
-      __ Csel(res, res, Operand(0), vc);  // VC for "not unordered".
-    }
+    __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   } else {
     // Integer cases.
     Register lhs = InputRegisterAt(instruction, 0);
     Operand rhs = InputOperandAt(instruction, 1);
     __ Cmp(lhs, rhs);
-    __ Cset(res, arm64_cond);
+    __ Cset(res, ARM64Condition(if_cond));
   }
 }
 
@@ -2849,15 +2813,11 @@
       } else {
         __ Fcmp(lhs, InputFPRegisterAt(condition, 1));
       }
-      if (condition->IsFPConditionTrueIfNaN()) {
-        __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
-      } else if (condition->IsFPConditionFalseIfNaN()) {
-        __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
-      }
       if (true_target == nullptr) {
-        __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+        IfCondition opposite_condition = condition->GetOppositeCondition();
+        __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
       } else {
-        __ B(ARM64Condition(condition->GetCondition()), true_target);
+        __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
       }
     } else {
       // Integer cases.
@@ -2874,7 +2834,8 @@
         non_fallthrough_target = true_target;
       }
 
-      if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
+          rhs.IsImmediate() && (rhs.immediate() == 0)) {
         switch (arm64_cond) {
           case eq:
             __ Cbz(lhs, non_fallthrough_target);
@@ -3495,9 +3456,9 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
@@ -3745,9 +3706,9 @@
 
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4061,19 +4022,33 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(LocationFrom(kArtMethodRegister));
+  } else {
+    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Location temp = instruction->GetLocations()->GetTemp(0);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+    __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
+    __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
+    __ Blr(lr);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -4559,18 +4534,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderARM64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorARM64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f2ff894..98303f6 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -195,6 +195,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -245,6 +246,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -339,10 +341,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 3229129..5bd136a3 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1042,7 +1042,7 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[A1_A2] = true;
 
@@ -1072,16 +1072,6 @@
     blocked_fpu_registers_[i] = true;
   }
 
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1096,52 +1086,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      MipsManagedRegister pair =
-          MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        MipsManagedRegister current =
-            MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  UNREACHABLE();
-}
-
 size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
   return kMipsWordSize;
@@ -3835,9 +3779,9 @@
 }
 
 void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3973,9 +3917,9 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -4357,19 +4301,34 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
-  codegen_->InvokeRuntime(
-      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
-      instruction,
-      instruction->GetDexPc(),
-      nullptr,
-      IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+    __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
+    __ Jalr(T9);
+    __ Nop();
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(
+        GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+        instruction,
+        instruction->GetDexPc(),
+        nullptr,
+        IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderMIPS::VisitNot(HNot* instruction) {
@@ -5222,18 +5181,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorMIPS::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c3d4851..2cde0ed 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,10 +290,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 38c32ca..0505486 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -979,7 +979,7 @@
   __ Bind(&done);
 }
 
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
   // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
   blocked_core_registers_[ZERO] = true;
   blocked_core_registers_[K0] = true;
@@ -1003,8 +1003,7 @@
 
   // TODO: review; anything else?
 
-  // TODO: make these two for's conditional on is_baseline once
-  // all the issues with register saving/restoring are sorted out.
+  // TODO: remove once all the issues with register saving/restoring are sorted out.
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     blocked_core_registers_[kCoreCalleeSaves[i]] = true;
   }
@@ -1014,20 +1013,6 @@
   }
 }
 
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
-  if (type == Primitive::kPrimVoid) {
-    LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  if (Primitive::IsFloatingPointType(type)) {
-    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
-    return Location::FpuRegisterLocation(reg);
-  } else {
-    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
-    return Location::RegisterLocation(reg);
-  }
-}
-
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
   return kMips64WordSize;
@@ -3031,9 +3016,9 @@
 }
 
 void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -3182,9 +3167,9 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3515,17 +3500,32 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+    __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
+    __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
+    __ Jalr(T9);
+    __ Nop();
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
@@ -4210,18 +4210,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorMIPS64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 7182e8e..140ff95 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -289,10 +289,7 @@
 
   // Register allocation.
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  // AllocateFreeRegister() is only used when allocating registers locally
-  // during CompileBaseline().
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6ab3aaf..f7ccdd8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -817,65 +817,13 @@
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong: {
-      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
-      X86ManagedRegister pair =
-          X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
-      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
-      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
-      UpdateBlockedPairRegisters();
-      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
-    }
-
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      Register reg = static_cast<Register>(
-          FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
-      // Block all register pairs that contain `reg`.
-      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-        X86ManagedRegister current =
-            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
-          blocked_register_pairs_[i] = true;
-        }
-      }
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      return Location::FpuRegisterLocation(
-          FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[ECX_EDX] = true;
 
   // Stack register is always reserved.
   blocked_core_registers_[ESP] = true;
 
-  if (is_baseline) {
-    blocked_core_registers_[EBP] = true;
-    blocked_core_registers_[ESI] = true;
-    blocked_core_registers_[EDI] = true;
-  }
-
   UpdateBlockedPairRegisters();
 }
 
@@ -1981,9 +1929,9 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1999,17 +1947,6 @@
   if (invoke->HasPcRelativeDexCache()) {
     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
-
-  if (codegen_->IsBaseline()) {
-    // Baseline does not have enough registers if the current method also
-    // needs a register. We therefore do not require a register for it, and let
-    // the code generation of the invoke handle it.
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -2022,9 +1959,9 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3943,20 +3880,33 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   locations->SetOut(Location::RegisterLocation(EAX));
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
-  DCHECK(!codegen_->IsLeafMethod());
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize);
+    __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
+    __ call(Address(temp, code_offset.Int32Value()));
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+    DCHECK(!codegen_->IsLeafMethod());
+  }
 }
 
 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
@@ -4273,7 +4223,7 @@
       if (current_method.IsRegister()) {
         method_reg = current_method.AsRegister<Register>();
       } else {
-        DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+        DCHECK(invoke->GetLocations()->Intrinsified());
         DCHECK(!current_method.IsValid());
         method_reg = reg;
         __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
@@ -5063,11 +5013,6 @@
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
-  // This location builder might end up asking to up to four registers, which is
-  // not currently possible for baseline. The situation in which we need four
-  // registers cannot be met by baseline though, because it has not run any
-  // optimization.
-
   Primitive::Type value_type = instruction->GetComponentType();
 
   bool needs_write_barrier =
@@ -6064,7 +6009,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -6295,8 +6240,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6575,6 +6520,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6637,7 +6584,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
@@ -6755,18 +6704,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderX86::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorX86::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c65c423..43e9543 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -359,9 +359,7 @@
     return GetLabelOf(block)->Position();
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -453,7 +451,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -461,7 +459,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 294b40e..2ce2d91 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1002,47 +1002,12 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
-  switch (type) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
-      return Location::RegisterLocation(reg);
-    }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
-      return Location::FpuRegisterLocation(reg);
-    }
-
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << type;
-  }
-
-  return Location::NoLocation();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   // Stack register is always reserved.
   blocked_core_registers_[RSP] = true;
 
   // Block the register used as TMP.
   blocked_core_registers_[TMP] = true;
-
-  if (is_baseline) {
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-    }
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
-    }
-  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -2161,9 +2126,9 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -2183,9 +2148,9 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // When we do not run baseline, explicit clinit checks triggered by static
-  // invokes must have been pruned by art::PrepareForRegisterAllocation.
-  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -3912,21 +3877,33 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
+  } else {
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  }
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
-                          instruction,
-                          instruction->GetDexPc(),
-                          nullptr);
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
-
-  DCHECK(!codegen_->IsLeafMethod());
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
+    __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
+    __ call(Address(temp, code_offset.SizeValue()));
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr);
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+    DCHECK(!codegen_->IsLeafMethod());
+  }
 }
 
 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
@@ -4686,13 +4663,13 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool object_array_set_with_read_barrier =
       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      (may_need_runtime_call || object_array_set_with_read_barrier) ?
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
           LocationSummary::kCallOnSlowPath :
           LocationSummary::kNoCall);
 
@@ -4721,7 +4698,7 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4773,7 +4750,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4782,7 +4759,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4860,7 +4837,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -5649,7 +5626,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5880,8 +5857,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6143,6 +6120,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6205,7 +6184,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+  //   here, which is a no-op thanks to the x86-64 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
@@ -6323,18 +6304,6 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void LocationsBuilderX86_64::VisitFakeString(HFakeString* instruction) {
-  DCHECK(codegen_->IsBaseline());
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
-}
-
-void InstructionCodeGeneratorX86_64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
-  DCHECK(codegen_->IsBaseline());
-  // Will be generated at use site.
-}
-
 // Simple implementation of packed switch - generate cascaded compare/jumps.
 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   LocationSummary* locations =
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 505c9dc..82aabb0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -347,8 +347,7 @@
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+  void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -401,7 +400,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t offset,
                                              Location temp,
@@ -409,7 +408,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d970704..19d63de 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -40,6 +40,7 @@
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
+#include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
@@ -70,8 +71,8 @@
     AddAllocatedRegister(Location::RegisterLocation(arm::R7));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters();
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
@@ -90,8 +91,8 @@
     AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
   }
 
-  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
-    x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+  void SetupBlockedRegisters() const OVERRIDE {
+    x86::CodeGeneratorX86::SetupBlockedRegisters();
     // ebx is a callee-save register in C, but caller-save for ART.
     blocked_core_registers_[x86::EBX] = true;
     blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -200,259 +201,228 @@
 }
 
 template <typename Expected>
-static void RunCodeBaseline(InstructionSet target_isa,
-                            HGraph* graph,
-                            bool has_result,
-                            Expected expected) {
-  InternalCodeAllocator allocator;
+static void RunCode(CodeGenerator* codegen,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  ASSERT_TRUE(graph->IsInSsaForm());
 
-  CompilerOptions compiler_options;
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-  // We avoid doing a stack overflow check that requires the runtime being setup,
-  // by making sure the compiler knows the methods we are running are leaf methods.
-  codegenX86.CompileBaseline(&allocator, true);
-  if (target_isa == kX86) {
-    Run(allocator, codegenX86, has_result, expected);
-  }
+  SSAChecker graph_checker(graph);
+  graph_checker.Run();
+  ASSERT_TRUE(graph_checker.IsValid());
 
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-  codegenARM.CompileBaseline(&allocator, true);
-  if (target_isa == kArm || target_isa == kThumb2) {
-    Run(allocator, codegenARM, has_result, expected);
-  }
-
-  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
-      X86_64InstructionSetFeatures::FromCppDefines());
-  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-  codegenX86_64.CompileBaseline(&allocator, true);
-  if (target_isa == kX86_64) {
-    Run(allocator, codegenX86_64, has_result, expected);
-  }
-
-  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
-      Arm64InstructionSetFeatures::FromCppDefines());
-  arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-  codegenARM64.CompileBaseline(&allocator, true);
-  if (target_isa == kArm64) {
-    Run(allocator, codegenARM64, has_result, expected);
-  }
-
-  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
-      MipsInstructionSetFeatures::FromCppDefines());
-  mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-  codegenMIPS.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips) {
-    Run(allocator, codegenMIPS, has_result, expected);
-  }
-
-  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
-      Mips64InstructionSetFeatures::FromCppDefines());
-  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-  codegenMIPS64.CompileBaseline(&allocator, true);
-  if (target_isa == kMips64) {
-    Run(allocator, codegenMIPS64, has_result, expected);
-  }
-}
-
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
-  // Tests may have already computed it.
-  if (graph->GetReversePostOrder().empty()) {
-    graph->BuildDominatorTree();
-  }
   SsaLivenessAnalysis liveness(graph, codegen);
-  liveness.Analyze();
 
-  RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
-  register_allocator.AllocateRegisters();
+  PrepareForRegisterAllocation(graph).Run();
+  liveness.Analyze();
+  RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
-  codegen->CompileOptimized(&allocator);
+  codegen->Compile(&allocator);
   Run(allocator, *codegen, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeOptimized(InstructionSet target_isa,
-                             HGraph* graph,
-                             std::function<void(HGraph*)> hook_before_codegen,
-                             bool has_result,
-                             Expected expected) {
+static void RunCode(InstructionSet target_isa,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
   CompilerOptions compiler_options;
   if (target_isa == kArm || target_isa == kThumb2) {
     std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
         ArmInstructionSetFeatures::FromCppDefines());
     TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
-    RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kArm64) {
     std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
         Arm64InstructionSetFeatures::FromCppDefines());
     arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
-    RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
-    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
-    RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
   } else if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
-    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+    RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
 }
 
-static void TestCode(InstructionSet target_isa,
-                     const uint16_t* data,
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+static void TestCode(const uint16_t* data,
                      bool has_result = false,
                      int32_t expected = 0) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-static void TestCodeLong(InstructionSet target_isa,
-                         const uint16_t* data,
+static void TestCodeLong(const uint16_t* data,
                          bool has_result,
                          int64_t expected) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  HGraph* graph = CreateGraph(&arena);
-  HGraphBuilder builder(graph, Primitive::kPrimLong);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  // Remove suspend checks, they cannot be executed in this context.
-  RemoveSuspendChecks(graph);
-  RunCodeBaseline(target_isa, graph, has_result, expected);
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateGraph(&arena);
+    HGraphBuilder builder(graph, Primitive::kPrimLong);
+    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+    bool graph_built = builder.BuildGraph(*item);
+    ASSERT_TRUE(graph_built);
+    // Remove suspend checks, they cannot be executed in this context.
+    RemoveSuspendChecks(graph);
+    TransformToSsa(graph);
+    RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+  }
 }
 
-class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+class CodegenTest : public CommonCompilerTest {};
 
-TEST_P(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0xFF00);
 
-  TestCode(GetParam(), data1);
+  TestCode(data1);
 
   const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_16, 3,
     Instruction::RETURN_VOID,
     Instruction::GOTO_16, 0xFFFF);
 
-  TestCode(GetParam(), data2);
+  TestCode(data2);
 
   const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 4, 0,
     Instruction::RETURN_VOID,
     Instruction::GOTO_32, 0xFFFF, 0xFFFF);
 
-  TestCode(GetParam(), data3);
+  TestCode(data3);
 }
 
-TEST_P(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0xFE00);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
 
-  TestCode(GetParam(), data);
+  TestCode(data);
 }
 
-TEST_P(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
-TEST_P(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -460,10 +430,10 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -471,12 +441,12 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(GetParam(), data, true, 0);
+  TestCode(data, true, 0);
 }
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) {                        \
+TEST_F(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -485,7 +455,7 @@
       Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
       Instruction::RETURN | 1 << 8);                    \
                                                         \
-  TestCode(GetParam(), data, true, EXPECTED_OUTPUT);    \
+  TestCode(data, true, EXPECTED_OUTPUT);                \
 }
 
 NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -501,7 +471,7 @@
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST_P(CodegenTest, TEST_NAME) {                                         \
+TEST_F(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -512,7 +482,7 @@
       Instruction::NOT_LONG | 2 << 8 | 0 << 12,                          \
       Instruction::RETURN_WIDE | 2 << 8);                                \
                                                                          \
-  TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT);                 \
+  TestCodeLong(data, true, EXPECTED_OUTPUT);                             \
 }
 
 NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -551,7 +521,7 @@
 
 #undef NOT_LONG_TEST
 
-TEST_P(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -565,192 +535,146 @@
       Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
       Instruction::RETURN_WIDE | 2 << 8);
 
-  TestCodeLong(GetParam(), data, true, 1);
+  TestCodeLong(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 7);
+  TestCode(data, true, 7);
 }
 
-TEST_P(CodegenTest, NonMaterializedCondition) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  entry->AddInstruction(new (&allocator) HGoto());
-
-  HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(first_block);
-  entry->AddSuccessor(first_block);
-  HIntConstant* constant0 = graph->GetIntConstant(0);
-  HIntConstant* constant1 = graph->GetIntConstant(1);
-  HEqual* equal = new (&allocator) HEqual(constant0, constant0);
-  first_block->AddInstruction(equal);
-  first_block->AddInstruction(new (&allocator) HIf(equal));
-
-  HBasicBlock* then = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
-  graph->AddBlock(then);
-  graph->AddBlock(else_);
-  graph->AddBlock(exit);
-  first_block->AddSuccessor(then);
-  first_block->AddSuccessor(else_);
-  then->AddSuccessor(exit);
-  else_->AddSuccessor(exit);
-
-  exit->AddInstruction(new (&allocator) HExit());
-  then->AddInstruction(new (&allocator) HReturn(constant0));
-  else_->AddInstruction(new (&allocator) HReturn(constant1));
-
-  ASSERT_TRUE(equal->NeedsMaterialization());
-  graph->BuildDominatorTree();
-  PrepareForRegisterAllocation(graph).Run();
-  ASSERT_FALSE(equal->NeedsMaterialization());
-
-  auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-    HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-    block->InsertInstructionBefore(move, block->GetLastInstruction());
-  };
-
-  RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
-}
-
-TEST_P(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
-    Instruction::CONST_4 | 3 << 12 | 0 << 8,
-    Instruction::CONST_4 | 0 << 12 | 1 << 8,
-    Instruction::CONST_4 | 4 << 12 | 2 << 8,
-    Instruction::CONST_4 | 0 << 12 | 3 << 8,
+    Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+    Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(GetParam(), data, true, 12);
+  TestCodeLong(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 12);
+  TestCode(data, true, 12);
 }
 
-TEST_P(CodegenTest, MaterializedCondition1) {
-  // Check that condition are materialized correctly. A materialized condition
-  // should yield `1` if it evaluated to true, and `0` otherwise.
-  // We force the materialization of comparisons for different combinations of
-  // inputs and check the results.
-
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
-
-  for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
+
     HGraph* graph = CreateGraph(&allocator);
+    HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry);
+    graph->SetEntryBlock(entry);
+    entry->AddInstruction(new (&allocator) HGoto());
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
-    HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(code_block);
+    HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(first_block);
+    entry->AddSuccessor(first_block);
+    HIntConstant* constant0 = graph->GetIntConstant(0);
+    HIntConstant* constant1 = graph->GetIntConstant(1);
+    HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+    first_block->AddInstruction(equal);
+    first_block->AddInstruction(new (&allocator) HIf(equal));
+
+    HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+    HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
     HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
-
-    entry_block->AddSuccessor(code_block);
-    code_block->AddSuccessor(exit_block);
     graph->SetExitBlock(exit_block);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    code_block->AddInstruction(&cmp_lt);
-    HReturn ret(&cmp_lt);
-    code_block->AddInstruction(&ret);
+    graph->AddBlock(then_block);
+    graph->AddBlock(else_block);
+    graph->AddBlock(exit_block);
+    first_block->AddSuccessor(then_block);
+    first_block->AddSuccessor(else_block);
+    then_block->AddSuccessor(exit_block);
+    else_block->AddSuccessor(exit_block);
+
+    exit_block->AddInstruction(new (&allocator) HExit());
+    then_block->AddInstruction(new (&allocator) HReturn(constant0));
+    else_block->AddInstruction(new (&allocator) HReturn(constant1));
+
+    ASSERT_TRUE(equal->NeedsMaterialization());
+    TransformToSsa(graph);
+    PrepareForRegisterAllocation(graph).Run();
+    ASSERT_FALSE(equal->NeedsMaterialization());
 
     auto hook_before_codegen = [](HGraph* graph_in) {
       HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -758,93 +682,143 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCode(target_isa, graph, hook_before_codegen, true, 0);
   }
 }
 
-TEST_P(CodegenTest, MaterializedCondition2) {
-  // Check that HIf correctly interprets a materialized condition.
-  // We force the materialization of comparisons for different combinations of
-  // inputs. An HIf takes the materialized combination as input and returns a
-  // value that we verify.
+TEST_F(CodegenTest, MaterializedCondition1) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that condition are materialized correctly. A materialized condition
+    // should yield `1` if it evaluated to true, and `0` otherwise.
+    // We force the materialization of comparisons for different combinations of
 
-  int lhs[] = {1, 2, -1, 2, 0xabc};
-  int rhs[] = {2, 1, 2, -1, 0xabc};
+    // inputs and check the results.
 
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
 
-  for (size_t i = 0; i < arraysize(lhs); i++) {
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    HGraph* graph = CreateGraph(&allocator);
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
 
-    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(entry_block);
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+      HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(code_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
 
-    HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_block);
-    HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_true_block);
-    HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(if_false_block);
-    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
-    graph->AddBlock(exit_block);
-    exit_block->AddInstruction(new (&allocator) HExit());
+      entry_block->AddSuccessor(code_block);
+      code_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
 
-    graph->SetEntryBlock(entry_block);
-    entry_block->AddSuccessor(if_block);
-    if_block->AddSuccessor(if_true_block);
-    if_block->AddSuccessor(if_false_block);
-    if_true_block->AddSuccessor(exit_block);
-    if_false_block->AddSuccessor(exit_block);
-    graph->SetExitBlock(exit_block);
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      code_block->AddInstruction(&cmp_lt);
+      HReturn ret(&cmp_lt);
+      code_block->AddInstruction(&ret);
 
-    HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
-    HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
-    HLessThan cmp_lt(cst_lhs, cst_rhs);
-    if_block->AddInstruction(&cmp_lt);
-    // We insert a temporary to separate the HIf from the HLessThan and force
-    // the materialization of the condition.
-    HTemporary force_materialization(0);
-    if_block->AddInstruction(&force_materialization);
-    HIf if_lt(&cmp_lt);
-    if_block->AddInstruction(&if_lt);
-
-    HIntConstant* cst_lt = graph->GetIntConstant(1);
-    HReturn ret_lt(cst_lt);
-    if_true_block->AddInstruction(&ret_lt);
-    HIntConstant* cst_ge = graph->GetIntConstant(0);
-    HReturn ret_ge(cst_ge);
-    if_false_block->AddInstruction(&ret_ge);
-
-    auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
-      HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
-      block->InsertInstructionBefore(move, block->GetLastInstruction());
-    };
-
-    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
   }
 }
 
-TEST_P(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, MaterializedCondition2) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    // Check that HIf correctly interprets a materialized condition.
+    // We force the materialization of comparisons for different combinations of
+    // inputs. An HIf takes the materialized combination as input and returns a
+    // value that we verify.
+
+    int lhs[] = {1, 2, -1, 2, 0xabc};
+    int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+    for (size_t i = 0; i < arraysize(lhs); i++) {
+      ArenaPool pool;
+      ArenaAllocator allocator(&pool);
+      HGraph* graph = CreateGraph(&allocator);
+
+      HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(entry_block);
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddInstruction(new (&allocator) HGoto());
+
+      HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_block);
+      HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_true_block);
+      HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(if_false_block);
+      HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+      graph->AddBlock(exit_block);
+      exit_block->AddInstruction(new (&allocator) HExit());
+
+      graph->SetEntryBlock(entry_block);
+      entry_block->AddSuccessor(if_block);
+      if_block->AddSuccessor(if_true_block);
+      if_block->AddSuccessor(if_false_block);
+      if_true_block->AddSuccessor(exit_block);
+      if_false_block->AddSuccessor(exit_block);
+      graph->SetExitBlock(exit_block);
+
+      HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+      HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+      HLessThan cmp_lt(cst_lhs, cst_rhs);
+      if_block->AddInstruction(&cmp_lt);
+      // We insert a temporary to separate the HIf from the HLessThan and force
+      // the materialization of the condition.
+      HTemporary force_materialization(0);
+      if_block->AddInstruction(&force_materialization);
+      HIf if_lt(&cmp_lt);
+      if_block->AddInstruction(&if_lt);
+
+      HIntConstant* cst_lt = graph->GetIntConstant(1);
+      HReturn ret_lt(cst_lt);
+      if_true_block->AddInstruction(&ret_lt);
+      HIntConstant* cst_ge = graph->GetIntConstant(0);
+      HReturn ret_ge(cst_ge);
+      if_false_block->AddInstruction(&ret_ge);
+
+      TransformToSsa(graph);
+      auto hook_before_codegen = [](HGraph* graph_in) {
+        HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+        HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+        block->InsertInstructionBefore(move, block->GetLastInstruction());
+      };
+      RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    }
+  }
+}
+
+TEST_F(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 1);
+  TestCode(data, true, 1);
 }
 
-TEST_P(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
     Instruction::DIV_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(GetParam(), data, true, 2);
+  TestCode(data, true, 2);
 }
 
 // Helper method.
@@ -933,80 +907,55 @@
   block->AddInstruction(comparison);
   block->AddInstruction(new (&allocator) HReturn(comparison));
 
-  auto hook_before_codegen = [](HGraph*) {
-  };
-  RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
+  TransformToSsa(graph);
+  RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
 }
 
-TEST_P(CodegenTest, ComparisonsInt) {
-  const InstructionSet target_isa = GetParam();
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+TEST_F(CodegenTest, ComparisonsInt) {
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+      }
     }
   }
 }
 
-TEST_P(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
-  const InstructionSet target_isa = GetParam();
-  if (target_isa == kMips || target_isa == kMips64) {
-    return;
-  }
+  for (InstructionSet target_isa : GetTargetISAs()) {
+    if (target_isa == kMips || target_isa == kMips64) {
+      continue;
+    }
 
-  for (int64_t i = -1; i <= 1; i++) {
-    for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+    for (int64_t i = -1; i <= 1; i++) {
+      for (int64_t j = -1; j <= 1; j++) {
+        TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+        TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+      }
     }
   }
 }
 
-static ::std::vector<InstructionSet> GetTargetISAs() {
-  ::std::vector<InstructionSet> v;
-  // Add all ISAs that are executable on hardware or on simulator.
-  const ::std::vector<InstructionSet> executable_isa_candidates = {
-    kArm,
-    kArm64,
-    kThumb2,
-    kX86,
-    kX86_64,
-    kMips,
-    kMips64
-  };
-
-  for (auto target_isa : executable_isa_candidates) {
-    if (CanExecute(target_isa)) {
-      v.push_back(target_isa);
-    }
-  }
-
-  return v;
-}
-
-INSTANTIATE_TEST_CASE_P(MultipleTargets,
-                        CodegenTest,
-                        ::testing::ValuesIn(GetTargetISAs()));
-
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 86a695b..e170e37 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -89,15 +89,18 @@
 }
 
 void HDeadCodeElimination::RemoveDeadBlocks() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not eliminate dead blocks if the graph has irreducible loops. We could
+    // support it, but that would require changes in our loop representation to handle
+    // multiple entry points. We decided it was not worth the complexity.
+    return;
+  }
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
 
   MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
-  // If the graph has irreducible loops we need to reset all graph analysis we have done
-  // before: the irreducible loop can be turned into a reducible one.
-  // For simplicity, we do the full computation regardless of the type of the loops.
   bool rerun_dominance_and_loop_analysis = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
@@ -105,9 +108,6 @@
   // inside out.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
-    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
-      rerun_dominance_and_loop_analysis = true;
-    }
     int id = block->GetBlockId();
     if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index d60f3e3..3113677 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -484,6 +484,18 @@
         loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
+  if (loop_information->GetSuspendCheck() == nullptr) {
+    AddError(StringPrintf(
+        "Loop with header %d does not have a suspend check.",
+        loop_header->GetBlockId()));
+  }
+
+  if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+    AddError(StringPrintf(
+        "Loop header %d does not have the loop suspend check as the first instruction.",
+        loop_header->GetBlockId()));
+  }
+
   // Ensure the loop header has only one incoming branch and the remaining
   // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().size();
@@ -589,6 +601,14 @@
     }
   }
 
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
   for (HEnvironment* environment = instruction->GetEnvironment();
@@ -618,6 +638,34 @@
                             instruction->GetId()));
     }
   }
+
+  if (instruction->CanThrowIntoCatchBlock()) {
+    // Find the top-level environment. This corresponds to the environment of
+    // the catch block since we do not inline methods with try/catch.
+    HEnvironment* environment = instruction->GetEnvironment();
+    while (environment->GetParent() != nullptr) {
+      environment = environment->GetParent();
+    }
+
+    // Find all catch blocks and test that `instruction` has an environment
+    // value for each one.
+    const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
+    for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+      for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* catch_phi = phi_it.Current()->AsPhi();
+        if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
+          AddError(StringPrintf("Instruction %s:%d throws into catch block %d "
+                                "with catch phi %d for vreg %d but its "
+                                "corresponding environment slot is empty.",
+                                instruction->DebugName(),
+                                instruction->GetId(),
+                                catch_block->GetBlockId(),
+                                catch_phi->GetId(),
+                                catch_phi->GetRegNumber()));
+        }
+      }
+    }
+  }
 }
 
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 32c3a92..6b8f61a 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -21,6 +21,7 @@
 #include <cctype>
 #include <sstream>
 
+#include "bounds_check_elimination.h"
 #include "code_generator.h"
 #include "dead_code_elimination.h"
 #include "disassembler.h"
@@ -426,6 +427,12 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+  void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_arm64
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
@@ -433,10 +440,6 @@
       StartAttributeStream("shift") << instruction->GetShiftAmount();
     }
   }
-
-  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
-    StartAttributeStream("kind") << instruction->GetOpKind();
-  }
 #endif
 
   bool IsPass(const char* name) {
@@ -505,6 +508,7 @@
     if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
         || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)
         || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
+        || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName)
         || IsPass(SsaBuilder::kSsaBuilderPassName)) {
       HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
       if (info == nullptr) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 293282e..20c4f1f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -356,12 +356,12 @@
       compare, invoke_instruction->GetDexPc());
   // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(load_class, cursor);
+    bb_cursor->InsertInstructionAfter(field_get, cursor);
   } else {
-    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+    bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(field_get, load_class);
-  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(load_class, field_get);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
   bb_cursor->InsertInstructionAfter(deoptimize, compare);
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b90afb1..49fc8c7 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -77,7 +77,6 @@
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
-  void VisitFakeString(HFakeString* fake_string) OVERRIDE;
   void VisitInvoke(HInvoke* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
@@ -1179,48 +1178,6 @@
   TryReplaceWithRotate(instruction);
 }
 
-void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
-  HInstruction* actual_string = nullptr;
-
-  // Find the string we need to replace this instruction with. The actual string is
-  // the return value of a StringFactory call.
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* use = it.Current()->GetUser();
-    if (use->IsInvokeStaticOrDirect()
-        && use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction)) {
-      use->AsInvokeStaticOrDirect()->RemoveFakeStringArgumentAsLastInput();
-      actual_string = use;
-      break;
-    }
-  }
-
-  // Check that there is no other instruction that thinks it is the factory for that string.
-  if (kIsDebugBuild) {
-    CHECK(actual_string != nullptr);
-    for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-      HInstruction* use = it.Current()->GetUser();
-      if (use->IsInvokeStaticOrDirect()) {
-        CHECK(!use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction));
-      }
-    }
-  }
-
-  // We need to remove any environment uses of the fake string that are not dominated by
-  // `actual_string` to null.
-  for (HUseIterator<HEnvironment*> it(instruction->GetEnvUses()); !it.Done(); it.Advance()) {
-    HEnvironment* environment = it.Current()->GetUser();
-    if (!actual_string->StrictlyDominates(environment->GetHolder())) {
-      environment->RemoveAsUserOfInput(it.Current()->GetIndex());
-      environment->SetRawEnvAt(it.Current()->GetIndex(), nullptr);
-    }
-  }
-
-  // Only uses dominated by `actual_string` must remain. We can safely replace and remove
-  // `instruction`.
-  instruction->ReplaceWith(actual_string);
-  instruction->GetBlock()->RemoveInstruction(instruction);
-}
-
 void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
   HInstruction* argument = instruction->InputAt(1);
   HInstruction* receiver = instruction->InputAt(0);
diff --git a/test/127-secondarydex/src/Test.java b/compiler/optimizing/instruction_simplifier_arm.cc
similarity index 60%
copy from test/127-secondarydex/src/Test.java
copy to compiler/optimizing/instruction_simplifier_arm.cc
index 8547e79..db1f9a7 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,17 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
+#include "instruction_simplifier_arm.h"
+#include "instruction_simplifier_shared.h"
 
-    private void print() {
-        System.out.println("Test");
-    }
+namespace art {
+namespace arm {
 
-    public String toString() {
-        return new String("Test");
-    }
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+    RecordSimplification();
+  }
 }
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
new file mode 100644
index 0000000..379b95d
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class InstructionSimplifierArmVisitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  void VisitMul(HMul* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+};
+
+
+class InstructionSimplifierArm : public HOptimization {
+ public:
+  InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+
+  void Run() OVERRIDE {
+    InstructionSimplifierArmVisitor visitor(graph_, stats_);
+    visitor.VisitReversePostOrder();
+  }
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6bbc751..83126a5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -17,6 +17,7 @@
 #include "instruction_simplifier_arm64.h"
 
 #include "common_arm64.h"
+#include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
 
 namespace art {
@@ -30,6 +31,15 @@
                                                                      HInstruction* array,
                                                                      HInstruction* index,
                                                                      int access_size) {
+  if (kEmitCompilerReadBarrier) {
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    //
+    // TODO: Handle this case properly in the ARM64 code generator and
+    // re-enable this optimization; otherwise, remove this TODO.
+    // b/26601270
+    return;
+  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
@@ -170,67 +180,6 @@
   return true;
 }
 
-bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
-    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
-  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
-  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
-  DCHECK_NE(input_binop, input_other);
-  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
-    return false;
-  }
-
-  // Try to interpret patterns like
-  //    a * (b <+/-> 1)
-  // as
-  //    (a * b) <+/-> a
-  HInstruction* input_a = input_other;
-  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
-  HInstruction::InstructionKind op_kind;
-
-  if (input_binop->IsAdd()) {
-    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
-      // Interpret
-      //    a * (b + 1)
-      // as
-      //    (a * b) + a
-      input_b = input_binop->GetLeastConstantLeft();
-      op_kind = HInstruction::kAdd;
-    }
-  } else {
-    DCHECK(input_binop->IsSub());
-    if (input_binop->GetRight()->IsConstant() &&
-        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
-      // Interpret
-      //    a * (b - (-1))
-      // as
-      //    a + (a * b)
-      input_b = input_binop->GetLeft();
-      op_kind = HInstruction::kAdd;
-    } else if (input_binop->GetLeft()->IsConstant() &&
-               input_binop->GetLeft()->AsConstant()->IsOne()) {
-      // Interpret
-      //    a * (1 - b)
-      // as
-      //    a - (a * b)
-      input_b = input_binop->GetRight();
-      op_kind = HInstruction::kSub;
-    }
-  }
-
-  if (input_b == nullptr) {
-    // We did not find a pattern we can optimize.
-    return false;
-  }
-
-  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
-      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
-
-  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
-  input_binop->GetBlock()->RemoveInstruction(input_binop);
-
-  return false;
-}
-
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -246,75 +195,8 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
-  Primitive::Type type = instruction->GetType();
-  if (!Primitive::IsIntOrLongType(type)) {
-    return;
-  }
-
-  HInstruction* use = instruction->HasNonEnvironmentUses()
-      ? instruction->GetUses().GetFirst()->GetUser()
-      : nullptr;
-
-  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
-    // Replace code looking like
-    //    MUL tmp, x, y
-    //    SUB dst, acc, tmp
-    // with
-    //    MULSUB dst, acc, x, y
-    // Note that we do not want to (unconditionally) perform the merge when the
-    // multiplication has multiple uses and it can be merged in all of them.
-    // Multiple uses could happen on the same control-flow path, and we would
-    // then increase the amount of work. In the future we could try to evaluate
-    // whether all uses are on different control-flow paths (using dominance and
-    // reverse-dominance information) and only perform the merge when they are.
-    HInstruction* accumulator = nullptr;
-    HBinaryOperation* binop = use->AsBinaryOperation();
-    HInstruction* binop_left = binop->GetLeft();
-    HInstruction* binop_right = binop->GetRight();
-    // Be careful after GVN. This should not happen since the `HMul` has only
-    // one use.
-    DCHECK_NE(binop_left, binop_right);
-    if (binop_right == instruction) {
-      accumulator = binop_left;
-    } else if (use->IsAdd()) {
-      DCHECK_EQ(binop_left, instruction);
-      accumulator = binop_right;
-    }
-
-    if (accumulator != nullptr) {
-      HArm64MultiplyAccumulate* mulacc =
-          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
-                                                                binop->GetKind(),
-                                                                accumulator,
-                                                                instruction->GetLeft(),
-                                                                instruction->GetRight());
-
-      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-      DCHECK(!instruction->HasUses());
-      instruction->GetBlock()->RemoveInstruction(instruction);
-      RecordSimplification();
-      return;
-    }
-  }
-
-  // Use multiply accumulate instruction for a few simple patterns.
-  // We prefer not applying the following transformations if the left and
-  // right inputs perform the same operation.
-  // We rely on GVN having squashed the inputs if appropriate. However the
-  // results are still correct even if that did not happen.
-  if (instruction->GetLeft() == instruction->GetRight()) {
-    return;
-  }
-
-  HInstruction* left = instruction->GetLeft();
-  HInstruction* right = instruction->GetRight();
-  if ((right->IsAdd() || right->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
-    return;
-  }
-  if ((left->IsAdd() || left->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
-    return;
+  if (TryCombineMultiplyAccumulate(instruction, kArm64)) {
+    RecordSimplification();
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index b7f490b..37a34c0 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,10 +51,6 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
-  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
-                                           HBinaryOperation* input_binop,
-                                           HInstruction* input_other);
-
   // HInstruction visitors, sorted alphabetically.
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
new file mode 100644
index 0000000..45d196f
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+
+namespace {
+
+bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                         HBinaryOperation* input_binop,
+                                         HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+  HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return true;
+}
+
+}  // namespace
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
+  Primitive::Type type = mul->GetType();
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      if (type != Primitive::kPrimInt) {
+        return false;
+      }
+      break;
+    case kArm64:
+      if (!Primitive::IsIntOrLongType(type)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  HInstruction* use = mul->HasNonEnvironmentUses()
+      ? mul->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    if (use->IsAdd() || use->IsSub()) {
+      // Replace code looking like
+      //    MUL tmp, x, y
+      //    SUB dst, acc, tmp
+      // with
+      //    MULSUB dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HBinaryOperation* binop = use->AsBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // Be careful after GVN. This should not happen since the `HMul` has only
+      // one use.
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      if (accumulator != nullptr) {
+        HMultiplyAccumulate* mulacc =
+            new (arena) HMultiplyAccumulate(type,
+                                            binop->GetKind(),
+                                            accumulator,
+                                            mul->GetLeft(),
+                                            mul->GetRight());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    } else if (use->IsNeg() && isa != kArm) {
+      HMultiplyAccumulate* mulacc =
+          new (arena) HMultiplyAccumulate(type,
+                                          HInstruction::kSub,
+                                          mul->GetBlock()->GetGraph()->GetConstant(type, 0),
+                                          mul->GetLeft(),
+                                          mul->GetRight());
+
+      use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
+      DCHECK(!mul->HasUses());
+      mul->GetBlock()->RemoveInstruction(mul);
+      return true;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (mul->GetLeft() == mul->GetRight()) {
+    return false;
+  }
+
+  HInstruction* left = mul->GetLeft();
+  HInstruction* right = mul->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
+    return true;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
+    return true;
+  }
+  return false;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
new file mode 100644
index 0000000..9832ecc
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index c6da9a3..5caf077 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -176,6 +176,16 @@
       }
 
     // Misc data processing.
+    case kIntrinsicBitCount:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerBitCount;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongBitCount;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
     case kIntrinsicNumberOfLeadingZeros:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimInt:
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 9f50d18..3bf3f7f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -85,9 +85,9 @@
                             InvokeDexCallingConventionVisitor* calling_convention_visitor) {
     if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
       HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
-      // When we do not run baseline, explicit clinit checks triggered by static
-      // invokes must have been pruned by art::PrepareForRegisterAllocation.
-      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+      // Explicit clinit checks triggered by static invokes must have been
+      // pruned by art::PrepareForRegisterAllocation.
+      DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
     }
 
     if (invoke->GetNumberOfArguments() == 0) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b1fbf28..e72f927 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1577,10 +1577,12 @@
 void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86..c5688a3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1447,8 +1447,10 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2e87546..ea38034 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -28,12 +28,14 @@
   V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bc126a27..81112b1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -935,6 +935,9 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8b45ea7..ac969e3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1724,6 +1724,9 @@
 void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 677f2e9..e48bed5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2303,6 +2303,81 @@
   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  if (is_long) {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  } else {
+    locations->SetInAt(0, Location::Any());
+  }
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (!is_long) {
+    if (src.IsRegister()) {
+      __ popcntl(out, src.AsRegister<Register>());
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ popcntl(out, Address(ESP, src.GetStackIndex()));
+    }
+    return;
+  }
+
+  // The 64-bit case needs to worry about both parts of the register.
+  DCHECK(src.IsRegisterPair());
+  Register src_lo = src.AsRegisterPairLow<Register>();
+  Register src_hi = src.AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  __ popcntl(temp, src_lo);
+  __ popcntl(out, src_hi);
+  __ addl(out, temp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 690cf3d..23a628f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2368,6 +2368,70 @@
   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  if (src.IsRegister()) {
+    if (is_long) {
+      __ popcntq(out, src.AsRegister<CpuRegister>());
+    } else {
+      __ popcntl(out, src.AsRegister<CpuRegister>());
+    }
+  } else if (is_long) {
+    DCHECK(src.IsDoubleStackSlot());
+    __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  } else {
+    DCHECK(src.IsStackSlot());
+    __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b80c6bd..cb7bc58 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -167,11 +167,7 @@
 void HGraph::ClearLoopInformation() {
   SetHasIrreducibleLoops(false);
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsLoopHeader()) {
-      current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck());
-    }
-    current->SetLoopInformation(nullptr);
+    it.Current()->SetLoopInformation(nullptr);
   }
 }
 
@@ -180,6 +176,14 @@
   dominator_ = nullptr;
 }
 
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+  HInstruction* instruction = GetFirstInstruction();
+  while (instruction->IsParallelMove()) {
+    instruction = instruction->GetNext();
+  }
+  return instruction;
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
@@ -457,6 +461,10 @@
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
+    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
+      // a loop got dismantled. Just remove the suspend check.
+      block->RemoveInstruction(block->GetFirstInstruction());
     }
   }
 }
@@ -2200,10 +2208,12 @@
     SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
   }
   // Adjust method's exception status from intrinsic table.
-  switch (exceptions) {
-    case kNoThrow: SetCanThrow(false); break;
-    case kCanThrow: SetCanThrow(true); break;
-  }
+  SetCanThrow(exceptions == kCanThrow);
+}
+
+bool HNewInstance::IsStringAlloc() const {
+  ScopedObjectAccess soa(Thread::Current());
+  return GetReferenceTypeInfo().IsStringClass();
 }
 
 bool HInvoke::NeedsEnvironment() const {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2313230..57fa558 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -44,7 +44,6 @@
 class HCurrentMethod;
 class HDoubleConstant;
 class HEnvironment;
-class HFakeString;
 class HFloatConstant;
 class HGraphBuilder;
 class HGraphVisitor;
@@ -861,6 +860,8 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  HInstruction* GetFirstInstructionDisregardMoves() const;
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.push_back(block);
     block->predecessors_.push_back(this);
@@ -1166,7 +1167,6 @@
   M(DoubleConstant, Constant)                                           \
   M(Equal, Condition)                                                   \
   M(Exit, Instruction)                                                  \
-  M(FakeString, Instruction)                                            \
   M(FloatConstant, Constant)                                            \
   M(Goto, Instruction)                                                  \
   M(GreaterThan, Condition)                                             \
@@ -1226,6 +1226,16 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+/*
+ * Instructions, shared across several (not all) architectures.
+ */
+#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
+  M(MultiplyAccumulate, Instruction)
+#endif
+
 #ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
 #else
@@ -1238,8 +1248,7 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
-  M(Arm64IntermediateAddress, Instruction)                              \
-  M(Arm64MultiplyAccumulate, Instruction)
+  M(Arm64IntermediateAddress, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1259,6 +1268,7 @@
 
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                               \
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
@@ -3265,6 +3275,61 @@
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
+class HNewInstance : public HExpression<2> {
+ public:
+  HNewInstance(HInstruction* cls,
+               HCurrentMethod* current_method,
+               uint32_t dex_pc,
+               uint16_t type_index,
+               const DexFile& dex_file,
+               bool can_throw,
+               bool finalizable,
+               QuickEntrypointEnum entrypoint)
+      : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
+        type_index_(type_index),
+        dex_file_(dex_file),
+        can_throw_(can_throw),
+        finalizable_(finalizable),
+        entrypoint_(entrypoint) {
+    SetRawInputAt(0, cls);
+    SetRawInputAt(1, current_method);
+  }
+
+  uint16_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
+
+  // Calls runtime so needs an environment.
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+
+  // It may throw when called on type that's not instantiable/accessible.
+  // It can throw OOME.
+  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
+  bool CanThrow() const OVERRIDE { return can_throw_ || true; }
+
+  bool IsFinalizable() const { return finalizable_; }
+
+  bool CanBeNull() const OVERRIDE { return false; }
+
+  QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
+
+  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+    entrypoint_ = entrypoint;
+  }
+
+  bool IsStringAlloc() const;
+
+  DECLARE_INSTRUCTION(NewInstance);
+
+ private:
+  const uint16_t type_index_;
+  const DexFile& dex_file_;
+  const bool can_throw_;
+  const bool finalizable_;
+  QuickEntrypointEnum entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(HNewInstance);
+};
+
 enum class Intrinsics {
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   k ## Name,
@@ -3558,10 +3623,9 @@
 
   // Get the index of the special input, if any.
   //
-  // If the invoke IsStringInit(), it initially has a HFakeString special argument
-  // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(),
-  // the "special input" is the current method pointer; otherwise there may be one
-  // platform-specific special input, such as PC-relative addressing base.
+  // If the invoke HasCurrentMethodInput(), the "special input" is the current
+  // method pointer; otherwise there may be one platform-specific special input,
+  // such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
 
   InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
@@ -3635,21 +3699,13 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  bool IsStringFactoryFor(HFakeString* str) const {
-    if (!IsStringInit()) return false;
-    DCHECK(!HasCurrentMethodInput());
-    if (InputCount() == (number_of_arguments_)) return false;
-    return InputAt(InputCount() - 1)->AsFakeString() == str;
-  }
-
-  void RemoveFakeStringArgumentAsLastInput() {
+  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
     DCHECK(IsStringInit());
-    size_t last_input_index = InputCount() - 1;
-    HInstruction* last_input = InputAt(last_input_index);
-    DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsFakeString()) << last_input->DebugName();
-    RemoveAsUserOfInput(last_input_index);
+    size_t index = InputCount() - 1;
+    HInstruction* input = InputAt(index);
+    RemoveAsUserOfInput(index);
     inputs_.pop_back();
+    return input;
   }
 
   // Is this a call to a static method whose declaring class has an
@@ -3756,59 +3812,6 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNewInstance : public HExpression<2> {
- public:
-  HNewInstance(HInstruction* cls,
-               HCurrentMethod* current_method,
-               uint32_t dex_pc,
-               uint16_t type_index,
-               const DexFile& dex_file,
-               bool can_throw,
-               bool finalizable,
-               QuickEntrypointEnum entrypoint)
-      : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
-        type_index_(type_index),
-        dex_file_(dex_file),
-        can_throw_(can_throw),
-        finalizable_(finalizable),
-        entrypoint_(entrypoint) {
-    SetRawInputAt(0, cls);
-    SetRawInputAt(1, current_method);
-  }
-
-  uint16_t GetTypeIndex() const { return type_index_; }
-  const DexFile& GetDexFile() const { return dex_file_; }
-
-  // Calls runtime so needs an environment.
-  bool NeedsEnvironment() const OVERRIDE { return true; }
-
-  // It may throw when called on type that's not instantiable/accessible.
-  // It can throw OOME.
-  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
-  bool CanThrow() const OVERRIDE { return can_throw_ || true; }
-
-  bool IsFinalizable() const { return finalizable_; }
-
-  bool CanBeNull() const OVERRIDE { return false; }
-
-  QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
-
-  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
-    entrypoint_ = entrypoint;
-  }
-
-  DECLARE_INSTRUCTION(NewInstance);
-
- private:
-  const uint16_t type_index_;
-  const DexFile& dex_file_;
-  const bool can_throw_;
-  const bool finalizable_;
-  QuickEntrypointEnum entrypoint_;
-
-  DISALLOW_COPY_AND_ASSIGN(HNewInstance);
-};
-
 class HNeg : public HUnaryOperation {
  public:
   HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
@@ -5589,26 +5592,6 @@
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
-/**
- * A HInstruction used as a marker for the replacement of new + <init>
- * of a String to a call to a StringFactory. Only baseline will see
- * the node at code generation, where it will be be treated as null.
- * When compiling non-baseline, `HFakeString` instructions are being removed
- * in the instruction simplifier.
- */
-class HFakeString : public HTemplateInstruction<0> {
- public:
-  explicit HFakeString(uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {}
-
-  Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimNot; }
-
-  DECLARE_INSTRUCTION(FakeString);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HFakeString);
-};
-
 class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
  public:
   MoveOperands(Location source,
@@ -5745,6 +5728,9 @@
 
 }  // namespace art
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+#include "nodes_shared.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "nodes_arm.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 445cdab..173852a 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,40 +118,6 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
-class HArm64MultiplyAccumulate : public HExpression<3> {
- public:
-  HArm64MultiplyAccumulate(Primitive::Type type,
-                           InstructionKind op,
-                           HInstruction* accumulator,
-                           HInstruction* mul_left,
-                           HInstruction* mul_right,
-                           uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
-    SetRawInputAt(kInputAccumulatorIndex, accumulator);
-    SetRawInputAt(kInputMulLeftIndex, mul_left);
-    SetRawInputAt(kInputMulRightIndex, mul_right);
-  }
-
-  static constexpr int kInputAccumulatorIndex = 0;
-  static constexpr int kInputMulLeftIndex = 1;
-  static constexpr int kInputMulRightIndex = 2;
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
-  }
-
-  InstructionKind GetOpKind() const { return op_kind_; }
-
-  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
-
- private:
-  // Indicates if this is a MADD or MSUB.
-  InstructionKind op_kind_;
-
-  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
new file mode 100644
index 0000000..b04b622
--- /dev/null
+++ b/compiler/optimizing/nodes_shared.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+
+namespace art {
+
+class HMultiplyAccumulate : public HExpression<3> {
+ public:
+  HMultiplyAccumulate(Primitive::Type type,
+                      InstructionKind op,
+                      HInstruction* accumulator,
+                      HInstruction* mul_left,
+                      HInstruction* mul_right,
+                      uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bb840ea..4da48bd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -62,6 +62,7 @@
 #include "induction_var_analysis.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
+#include "instruction_simplifier_arm.h"
 #include "intrinsics.h"
 #include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
@@ -127,7 +128,7 @@
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
-        visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+        visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
@@ -305,30 +306,19 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Whether we should run any optimization or register allocation. If false, will
-  // just run the code generation after the graph was built.
-  const bool run_optimizations_;
-
   // Create a 'CompiledMethod' for an optimized graph.
-  CompiledMethod* EmitOptimized(ArenaAllocator* arena,
-                                CodeVectorAllocator* code_allocator,
-                                CodeGenerator* codegen,
-                                CompilerDriver* driver) const;
-
-  // Create a 'CompiledMethod' for a non-optimized graph.
-  CompiledMethod* EmitBaseline(ArenaAllocator* arena,
-                               CodeVectorAllocator* code_allocator,
-                               CodeGenerator* codegen,
-                               CompilerDriver* driver) const;
+  CompiledMethod* Emit(ArenaAllocator* arena,
+                       CodeVectorAllocator* code_allocator,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver) const;
 
   // Try compiling a method and return the code generator used for
   // compiling it.
   // This method:
   // 1) Builds the graph. Returns null if it failed to build it.
-  // 2) If `run_optimizations_` is set:
-  //    2.1) Transform the graph to SSA. Returns null if it failed.
-  //    2.2) Run optimizations on the graph, including register allocator.
-  // 3) Generate code with the `code_allocator` provided.
+  // 2) Transforms the graph to SSA. Returns null if it failed.
+  // 3) Runs optimizations on the graph, including register allocator.
+  // 4) Generates code with the `code_allocator` provided.
   CodeGenerator* TryCompile(ArenaAllocator* arena,
                             CodeVectorAllocator* code_allocator,
                             const DexFile::CodeItem* code_item,
@@ -350,21 +340,19 @@
 static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
 
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
-    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
-      run_optimizations_(
-          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
 
 void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
   if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
     std::ios_base::openmode cfg_file_mode =
-        driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+        driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
   }
   if (driver->GetDumpStats()) {
@@ -458,8 +446,11 @@
     case kThumb2:
     case kArm: {
       arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      arm::InstructionSimplifierArm* simplifier =
+          new (arena) arm::InstructionSimplifierArm(graph, stats);
       HOptimization* arm_optimizations[] = {
-        fixups
+        fixups,
+        simplifier
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
@@ -577,17 +568,6 @@
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
-  size_t size = vector.size();
-  size_t aligned_size = RoundUp(size, 4);
-  for (; size < aligned_size; ++size) {
-    vector.push_back(0);
-  }
-  return ArrayRef<const uint8_t>(vector);
-}
-
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -601,10 +581,10 @@
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
-                                                  CodeVectorAllocator* code_allocator,
-                                                  CodeGenerator* codegen,
-                                                  CompilerDriver* compiler_driver) const {
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+                                         CodeVectorAllocator* code_allocator,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   stack_map.resize(codegen->ComputeStackMapsSize());
@@ -630,39 +610,6 @@
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::EmitBaseline(
-    ArenaAllocator* arena,
-    CodeVectorAllocator* code_allocator,
-    CodeGenerator* codegen,
-    CompilerDriver* compiler_driver) const {
-  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
-  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildMappingTable(&mapping_table);
-  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildVMapTable(&vmap_table);
-  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
-
-  CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
-      compiler_driver,
-      codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
-      // Follow Quick's behavior and set the frame size to zero if it is
-      // considered "empty" (see the definition of
-      // art::CodeGenerator::HasEmptyFrame).
-      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
-      codegen->GetCoreSpillMask(),
-      codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(),
-      AlignVectorSize(mapping_table),
-      AlignVectorSize(vmap_table),
-      AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>(linker_patches));
-  return compiled_method;
-}
-
 CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
                                               CodeVectorAllocator* code_allocator,
                                               const DexFile::CodeItem* code_item,
@@ -775,41 +722,37 @@
 
   VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
 
-  if (run_optimizations_) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScopeCollection handles(soa.Self());
-    ScopedThreadSuspension sts(soa.Self(), kNative);
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  ScopedThreadSuspension sts(soa.Self(), kNative);
 
-    {
-      PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
-      if (result != kAnalysisSuccess) {
-        switch (result) {
-          case kAnalysisFailThrowCatchLoop:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
-            break;
-          case kAnalysisFailAmbiguousArrayOp:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
-            break;
-          case kAnalysisSuccess:
-            UNREACHABLE();
-        }
-        pass_observer.SetGraphInBadState();
-        return nullptr;
+  {
+    PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
+    GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
+    if (result != kAnalysisSuccess) {
+      switch (result) {
+        case kAnalysisFailThrowCatchLoop:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+          break;
+        case kAnalysisFailAmbiguousArrayOp:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+          break;
+        case kAnalysisSuccess:
+          UNREACHABLE();
       }
+      pass_observer.SetGraphInBadState();
+      return nullptr;
     }
-
-    RunOptimizations(graph,
-                     codegen.get(),
-                     compiler_driver,
-                     compilation_stats_.get(),
-                     dex_compilation_unit,
-                     &pass_observer,
-                     &handles);
-    codegen->CompileOptimized(code_allocator);
-  } else {
-    codegen->CompileBaseline(code_allocator);
   }
+
+  RunOptimizations(graph,
+                   codegen.get(),
+                   compiler_driver,
+                   compilation_stats_.get(),
+                   dex_compilation_unit,
+                   &pass_observer,
+                   &handles);
+  codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
 
   if (kArenaAllocatorCountAllocations) {
@@ -861,11 +804,7 @@
                    dex_cache));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
-      if (run_optimizations_) {
-        method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
-      } else {
-        method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
-      }
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
     }
   } else {
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -928,8 +867,6 @@
   {
     // Go to native so that we don't block GC during compilation.
     ScopedThreadSuspension sts(self, kNative);
-
-    DCHECK(run_optimizations_);
     codegen.reset(
         TryCompile(&arena,
                    &code_allocator,
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3..be470cc 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@
 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
   // This function is used to reduce the dependencies in the graph after
   // (from -> to) has been performed. Since we ensure there is no move with the same
-  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // destination, (to -> X) cannot be blocked while (from -> X) might still be
   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
   // a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 1c25e48..527c242 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -58,7 +58,6 @@
   void VisitCheckCast(HCheckCast* instr) OVERRIDE;
   void VisitBoundType(HBoundType* instr) OVERRIDE;
   void VisitNullCheck(HNullCheck* instr) OVERRIDE;
-  void VisitFakeString(HFakeString* instr) OVERRIDE;
   void UpdateReferenceTypeInfo(HInstruction* instr,
                                uint16_t type_idx,
                                const DexFile& dex_file,
@@ -568,10 +567,6 @@
   }
 }
 
-void RTPVisitor::VisitFakeString(HFakeString* instr) {
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
-}
-
 void RTPVisitor::VisitBoundType(HBoundType* instr) {
   ScopedObjectAccess soa(Thread::Current());
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2bae4bc..a966b62 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -72,8 +72,7 @@
   float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
   double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
 
-  static constexpr bool kIsBaseline = false;
-  codegen->SetupBlockedRegisters(kIsBaseline);
+  codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
   physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
   // Always reserve for the current method and the graph's max out registers.
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 207e3f3..165d09d 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@
   return true;
 }
 
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+  if (GetGraph()->IsDebuggable()) {
+    // Do not perform the optimization for consistency with the interpreter
+    // which always allocates an object for new-instance of String.
+    return;
+  }
+
+  for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsStringAlloc());
+
+    // Replace NewInstance of String with NullConstant if not used prior to
+    // calling StringFactory. In case of deoptimization, the interpreter is
+    // expected to skip null check on the `this` argument of the StringFactory call.
+    if (!new_instance->HasNonEnvironmentUses()) {
+      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+      // Remove LoadClass if not needed any more.
+      HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+      DCHECK(load_class != nullptr);
+      DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      if (!load_class->HasUses()) {
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
+    }
+  }
+}
+
 GraphAnalysisResult SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block
   // visited (with the exception of loops) in order to create the right environment
@@ -487,7 +515,15 @@
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Clear locals.
+  // 11) Step 1) replaced uses of NewInstances of String with the results of
+  // their corresponding StringFactory calls. Unless the String objects are used
+  // before they are initialized, they can be replaced with NullConstant.
+  // Note that this optimization is valid only if unsimplified code does not use
+  // the uninitialized value because we assume execution can be deoptimized at
+  // any safepoint. We must therefore perform it before any other optimizations.
+  RemoveRedundantUninitializedStrings();
+
+  // 12) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -885,4 +921,31 @@
   VisitInstruction(aset);
 }
 
+void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  VisitInstruction(invoke);
+
+  if (invoke->IsStringInit()) {
+    // This is a StringFactory call which acts as a String constructor. Its
+    // result replaces the empty String pre-allocated by NewInstance.
+    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
+
+    // Replacing the NewInstance might render it redundant. Keep a list of these
+    // to be visited once it is clear whether it is has remaining uses.
+    if (arg_this->IsNewInstance()) {
+      uninitialized_strings_.push_back(arg_this->AsNewInstance());
+    } else {
+      DCHECK(arg_this->IsPhi());
+      // NewInstance is not the direct input of the StringFactory call. It might
+      // be redundant but optimizing this case is not worth the effort.
+    }
+
+    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
+    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+      if ((*current_locals_)[vreg] == arg_this) {
+        (*current_locals_)[vreg] = invoke;
+      }
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 743dabd..ccef8ea 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -57,6 +57,7 @@
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
@@ -70,13 +71,14 @@
   ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
   HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
 
-  void VisitBasicBlock(HBasicBlock* block);
-  void VisitLoadLocal(HLoadLocal* load);
-  void VisitStoreLocal(HStoreLocal* store);
-  void VisitInstruction(HInstruction* instruction);
-  void VisitTemporary(HTemporary* instruction);
-  void VisitArrayGet(HArrayGet* aget);
-  void VisitArraySet(HArraySet* aset);
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+  void VisitLoadLocal(HLoadLocal* load) OVERRIDE;
+  void VisitStoreLocal(HStoreLocal* store) OVERRIDE;
+  void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  void VisitTemporary(HTemporary* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* aget) OVERRIDE;
+  void VisitArraySet(HArraySet* aset) OVERRIDE;
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
 
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
@@ -104,6 +106,8 @@
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
   HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
 
+  void RemoveRedundantUninitializedStrings();
+
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
@@ -118,6 +122,7 @@
 
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
+  ArenaVector<HNewInstance*> uninitialized_strings_;
 
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
index 81f2a56..85335ef 100644
--- a/compiler/profile_assistant.cc
+++ b/compiler/profile_assistant.cc
@@ -16,54 +16,154 @@
 
 #include "profile_assistant.h"
 
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
 namespace art {
 
 // Minimum number of new methods that profiles must contain to enable recompilation.
 static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
 
-bool ProfileAssistant::ProcessProfiles(
-      const std::vector<std::string>& profile_files,
-      const std::vector<std::string>& reference_profile_files,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+bool ProfileAssistant::ProcessProfilesInternal(
+        const std::vector<ScopedFlock>& profile_files,
+        const std::vector<ScopedFlock>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
   DCHECK(!profile_files.empty());
-  DCHECK(reference_profile_files.empty() ||
+  DCHECK(!reference_profile_files.empty() ||
       (profile_files.size() == reference_profile_files.size()));
 
   std::vector<ProfileCompilationInfo> new_info(profile_files.size());
   bool should_compile = false;
   // Read the main profile files.
-  for (size_t i = 0; i < profile_files.size(); i++) {
-    if (!new_info[i].Load(profile_files[i])) {
-      LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
       return false;
     }
     // Do we have enough new profiled methods that will make the compilation worthwhile?
     should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
   }
+
   if (!should_compile) {
-    *profile_compilation_info = nullptr;
     return true;
   }
 
   std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+  // Merge information.
   for (size_t i = 0; i < new_info.size(); i++) {
-    // Merge all data into a single object.
-    result->Load(new_info[i]);
-    // If we have any reference profile information merge their information with
-    // the current profiles and save them back to disk.
     if (!reference_profile_files.empty()) {
-      if (!new_info[i].Load(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+      if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not load reference profile file at index " << i;
         return false;
       }
-      if (!new_info[i].Save(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+    }
+    // Merge all data into a single object.
+    if (!result->Load(new_info[i])) {
+      LOG(WARNING) << "Could not merge profile data at index " << i;
+      return false;
+    }
+  }
+  // We were successful in merging all profile information. Update the files.
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!reference_profile_files.empty()) {
+      if (!reference_profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear reference profile file at index " << i;
+        return false;
+      }
+      if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not save reference profile file at index " << i;
+        return false;
+      }
+      if (!profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear profile file at index " << i;
         return false;
       }
     }
   }
+
   *profile_compilation_info = result.release();
   return true;
 }
 
+class ScopedCollectionFlock {
+ public:
+  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+    for (size_t i = 0; i < filenames.size(); i++) {
+      if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      // We do not own the descriptor, so disable auto-close and don't check usage.
+      File file(fds[i], false);
+      file.DisableAutoClose();
+      if (!flocks_[i].Init(&file, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+  std::vector<ScopedFlock> flocks_;
+};
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<uint32_t>& profile_files_fd,
+        const std::vector<uint32_t>& reference_profile_files_fd,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<std::string>& profile_files,
+        const std::vector<std::string>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files.size());
+  if (!profile_files_flocks.Init(profile_files, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
 }  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
index 088c8bd..ad5e216 100644
--- a/compiler/profile_assistant.h
+++ b/compiler/profile_assistant.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "base/scoped_flock.h"
 #include "jit/offline_profiling_info.cc"
 
 namespace art {
@@ -52,7 +53,17 @@
       const std::vector<std::string>& reference_profile_files,
       /*out*/ ProfileCompilationInfo** profile_compilation_info);
 
+  static bool ProcessProfiles(
+      const std::vector<uint32_t>& profile_files_fd_,
+      const std::vector<uint32_t>& reference_profile_files_fd_,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
  private:
+  static bool ProcessProfilesInternal(
+      const std::vector<ScopedFlock>& profile_files,
+      const std::vector<ScopedFlock>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
 
diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc
new file mode 100644
index 0000000..58b7513
--- /dev/null
+++ b/compiler/profile_assistant_test.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "compiler/profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+  void SetupProfile(const std::string& id,
+                    uint32_t checksum,
+                    uint16_t number_of_methods,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0) {
+    std::string dex_location1 = "location1" + id;
+    uint32_t dex_location_checksum1 = checksum;
+    std::string dex_location2 = "location2" + id;
+    uint32_t dex_location_checksum2 = 10 * checksum;
+    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+      ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  uint32_t GetFd(const ScratchFile& file) const {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info1;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1,
+      &reference_info1, kNumberOfMethodsToEnableCompilation / 2);
+  ProfileCompilationInfo reference_info2;
+  SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2,
+      &reference_info2, kNumberOfMethodsToEnableCompilation / 2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Load(reference_info1));
+  ASSERT_TRUE(expected.Load(reference_info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ProfileCompilationInfo merge1;
+  ASSERT_TRUE(merge1.Load(info1));
+  ASSERT_TRUE(merge1.Load(reference_info1));
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(merge1));
+
+  ProfileCompilationInfo file_info2;
+  ProfileCompilationInfo merge2;
+  ASSERT_TRUE(merge2.Load(info2));
+  ASSERT_TRUE(merge2.Load(reference_info2));
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(merge2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should fail processing.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must still remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile)));
+  ASSERT_TRUE(file_info2.Equals(reference_info));
+}
+
+}  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b6a228c..e57a540 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -21,6 +21,7 @@
 #include <set>
 #include <map>
 #include <vector>
+#include <zlib.h>
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
@@ -161,7 +162,6 @@
     uint32_t total_size = data_section_offset + data_section_size;
 
     dex_file_data_.resize(total_size);
-    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     for (const auto& entry : strings_) {
       CHECK_LT(entry.first.size(), 128u);
@@ -210,7 +210,12 @@
       Write32(raw_offset + 4u, GetStringIdx(entry.first.name));
     }
 
-    // Leave checksum and signature as zeros.
+    // Leave signature as zeros.
+
+    header->file_size_ = dex_file_data_.size();
+    size_t skip = sizeof(header->magic_) + sizeof(header->checksum_);
+    header->checksum_ = adler32(0u, dex_file_data_.data() + skip, dex_file_data_.size() - skip);
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index d6caa3c..7138a46 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@
   EmitOperand(dst, src);
 }
 
+void X86Assembler::popcntl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 655af9c..759a41e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -330,11 +330,15 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+
   void bsfl(Register dst, Register src);
   void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void popcntl(Register dst, Register src);
+  void popcntl(Register dst, const Address& src);
+
   void rorl(Register reg, const Immediate& imm);
   void rorl(Register operand, Register shifter);
   void roll(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a9b991c..0fd0982 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -260,6 +260,19 @@
   DriverStr(expected, "bsrl_address");
 }
 
+TEST_F(AssemblerX86Test, Popcntl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db07267..10f5a00 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2247,6 +2247,42 @@
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e3..6f0847e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -647,6 +647,11 @@
   void bsrq(CpuRegister dst, CpuRegister src);
   void bsrq(CpuRegister dst, const Address& src);
 
+  void popcntl(CpuRegister dst, CpuRegister src);
+  void popcntl(CpuRegister dst, const Address& src);
+  void popcntq(CpuRegister dst, CpuRegister src);
+  void popcntq(CpuRegister dst, const Address& src);
+
   void rorl(CpuRegister reg, const Immediate& imm);
   void rorl(CpuRegister operand, CpuRegister shifter);
   void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca..8a87fca 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1333,6 +1333,44 @@
   DriverStr(expected, "bsrq_address");
 }
 
+TEST_F(AssemblerX86_64Test, Popcntl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+    "popcntl 0xc(%R10,%RBX,4), %edi\n"
+    "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+    "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+    "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "popcntq_address");
+}
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 855e545..918a01b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -340,6 +340,12 @@
   UsageError("      --profile-file will be merged into --reference-profile-file. Valid only when");
   UsageError("      specified together with --profile-file.");
   UsageError("");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
+  UsageError("");
+  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
+  UsageError("      accepts a file descriptor. Cannot be used together with");
+  UsageError("       --reference-profile-file.");
   UsageError("  --print-pass-names: print a list of pass names");
   UsageError("");
   UsageError("  --disable-passes=<pass-names>:  disable one or more passes separated by comma.");
@@ -497,12 +503,24 @@
   return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
 }
 
+static void CloseAllFds(const std::vector<uint32_t>& fds, const char* descriptor) {
+  for (size_t i = 0; i < fds.size(); i++) {
+    if (close(fds[i]) < 0) {
+      PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+    }
+  }
+}
+
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
       compiler_kind_(Compiler::kOptimizing),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0),
+      image_patch_delta_(0),
+      key_value_store_(nullptr),
       verification_results_(nullptr),
       method_inliner_map_(),
       runtime_(nullptr),
@@ -522,23 +540,23 @@
       boot_image_(false),
       multi_image_(false),
       is_host_(false),
+      class_loader_(nullptr),
+      elf_writers_(),
+      oat_writers_(),
+      rodata_(),
       image_writer_(nullptr),
       driver_(nullptr),
+      opened_dex_files_maps_(),
+      opened_dex_files_(),
       dump_stats_(false),
       dump_passes_(false),
       dump_timing_(false),
       dump_slow_timing_(kIsDebugBuild),
-      dump_cfg_append_(false),
       swap_fd_(-1),
       app_image_fd_(kInvalidImageFd),
       timings_(timings) {}
 
   ~Dex2Oat() {
-    // Free opened dex files before deleting the runtime_, because ~DexFile
-    // uses MemMap, which is shut down by ~Runtime.
-    class_path_files_.clear();
-    opened_dex_files_.clear();
-
     // Log completion time before deleting the runtime_, because this accesses
     // the runtime.
     LogCompletionTime();
@@ -551,6 +569,9 @@
       for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
         dex_file.release();
       }
+      for (std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+        map.release();
+      }
       for (std::unique_ptr<File>& oat_file : oat_files_) {
         oat_file.release();
       }
@@ -576,6 +597,14 @@
     ParseUintOption(option, "--oat-fd", &oat_fd_, Usage);
   }
 
+  void ParseFdForCollection(const StringPiece& option,
+                            const char* arg_name,
+                            std::vector<uint32_t>* fds) {
+    uint32_t fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
+  }
+
   void ParseJ(const StringPiece& option) {
     ParseUintOption(option, "-j", &thread_count_, Usage, /* is_long_option */ false);
   }
@@ -779,11 +808,25 @@
       }
     }
 
+    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
+      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
+    }
     if (!profile_files_.empty()) {
       if (!reference_profile_files_.empty() &&
           (reference_profile_files_.size() != profile_files_.size())) {
         Usage("If specified, --reference-profile-file should match the number of --profile-file.");
       }
+    } else if (!reference_profile_files_.empty()) {
+      Usage("--reference-profile-file should only be supplied with --profile-file");
+    }
+    if (!profile_files_fd_.empty()) {
+      if (!reference_profile_files_fd_.empty() &&
+          (reference_profile_files_fd_.size() != profile_files_fd_.size())) {
+        Usage("If specified, --reference-profile-file-fd should match the number",
+              " of --profile-file-fd.");
+      }
+    } else if (!reference_profile_files_fd_.empty()) {
+      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
     }
 
     if (!parser_options->oat_symbols.empty()) {
@@ -1077,6 +1120,10 @@
       } else if (option.starts_with("--reference-profile-file=")) {
         reference_profile_files_.push_back(
             option.substr(strlen("--reference-profile-file=")).ToString());
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
+      } else if (option.starts_with("--reference-profile-file-fd=")) {
+        ParseFdForCollection(option, "--reference_profile-file-fd", &reference_profile_files_fd_);
       } else if (option == "--no-profile-file") {
         // No profile
       } else if (option == "--host") {
@@ -1093,10 +1140,6 @@
         dump_timing_ = true;
       } else if (option == "--dump-passes") {
         dump_passes_ = true;
-      } else if (option.starts_with("--dump-cfg=")) {
-        dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
-      } else if (option.starts_with("--dump-cfg-append")) {
-        dump_cfg_append_ = true;
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
       } else if (option.starts_with("--swap-file=")) {
@@ -1130,6 +1173,9 @@
   // Check whether the oat output files are writable, and open them for later. Also open a swap
   // file, if a name is given.
   bool OpenFile() {
+    // Prune non-existent dex files now so that we don't create empty oat files for multi-image.
+    PruneNonExistentDexFiles();
+
     // Expand oat and image filenames for multi image.
     if (IsBootImage() && multi_image_) {
       ExpandOatAndImageFilenames();
@@ -1201,9 +1247,6 @@
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
 
-    // Organize inputs, handling multi-dex and multiple oat file outputs.
-    CreateDexOatMappings();
-
     return true;
   }
 
@@ -1246,89 +1289,135 @@
       return false;
     }
 
+    CreateOatWriters();
+    if (!AddDexFileSources()) {
+      return false;
+    }
+
+    if (IsBootImage() && image_filenames_.size() > 1) {
+      // If we're compiling the boot image, store the boot classpath into the Key-Value store.
+      // We need this for the multi-image case.
+      key_value_store_->Put(OatHeader::kBootClassPath, GetMultiImageBootClassPath());
+    }
+
+    if (!IsBootImage()) {
+      // When compiling an app, create the runtime early to retrieve
+      // the image location key needed for the oat header.
+      if (!CreateRuntime(std::move(runtime_options))) {
+        return false;
+      }
+
+      {
+        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
+        image_file_location_oat_data_begin_ =
+            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
+        image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
+        // Store the boot image filename(s).
+        std::vector<std::string> image_filenames;
+        for (const gc::space::ImageSpace* image_space : image_spaces) {
+          image_filenames.push_back(image_space->GetImageFilename());
+        }
+        std::string image_file_location = Join(image_filenames, ':');
+        if (!image_file_location.empty()) {
+          key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
+        }
+      }
+
+      // Open dex files for class path.
+      const std::vector<std::string> class_path_locations =
+          GetClassPathLocations(runtime_->GetClassPathString());
+      OpenClassPathFiles(class_path_locations, &class_path_files_);
+
+      // Store the classpath we have right now.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      key_value_store_->Put(OatHeader::kClassPathKey,
+                            OatFile::EncodeDexFileDependencies(class_path_files));
+    }
+
+    // Now that we have finalized key_value_store_, start writing the oat file.
     {
-      TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
+      TimingLogger::ScopedTiming t_dex("Writing and opening dex files", timings_);
+      rodata_.reserve(oat_writers_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        rodata_.push_back(elf_writers_[i]->StartRoData());
+        // Unzip or copy dex files straight to the oat file.
+        std::unique_ptr<MemMap> opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+        if (!oat_writers_[i]->WriteAndOpenDexFiles(rodata_.back(),
+                                                   oat_files_[i].get(),
+                                                   instruction_set_,
+                                                   instruction_set_features_.get(),
+                                                   key_value_store_.get(),
+                                                   &opened_dex_files_map,
+                                                   &opened_dex_files)) {
+          return false;
+        }
+        dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files));
+        if (opened_dex_files_map != nullptr) {
+          opened_dex_files_maps_.push_back(std::move(opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+            dex_file_oat_filename_map_.emplace(dex_file.get(), oat_filenames_[i]);
+            opened_dex_files_.push_back(std::move(dex_file));
+          }
+        } else {
+          DCHECK(opened_dex_files.empty());
+        }
+      }
+    }
+
+    dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
+    if (IsBootImage()) {
+      // For boot image, pass opened dex files to the Runtime::Create().
+      // Note: Runtime acquires ownership of these dex files.
+      runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_);
       if (!CreateRuntime(std::move(runtime_options))) {
         return false;
       }
     }
 
-    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-    // Runtime::Start, give it away now so that we don't starve GC.
-    Thread* self = Thread::Current();
-    self->TransitionFromRunnableToSuspended(kNative);
     // If we're doing the image, override the compiler filter to force full compilation. Must be
     // done ahead of WellKnownClasses::Init that causes verification.  Note: doesn't force
     // compilation of class initializers.
     // Whilst we're in native take the opportunity to initialize well known classes.
+    Thread* self = Thread::Current();
     WellKnownClasses::Init(self->GetJniEnv());
 
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_filename_.empty()) {
-      dex_files_ = class_linker->GetBootClassPath();
-      // Prune invalid dex locations.
-      for (size_t i = 0; i < dex_locations_.size(); i++) {
-        const char* dex_location = dex_locations_[i];
-        bool contains = false;
-        for (const DexFile* dex_file : dex_files_) {
-          if (strcmp(dex_location, dex_file->GetLocation().c_str()) == 0) {
-            contains = true;
-            break;
-          }
-        }
-        if (!contains) {
-          dex_locations_.erase(dex_locations_.begin() + i);
-          i--;
-        }
-      }
-    } else {
-      TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
-      if (dex_filenames_.empty()) {
-        ATRACE_BEGIN("Opening zip archive from file descriptor");
-        std::string error_msg;
-        std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd_,
-                                                                       zip_location_.c_str(),
-                                                                       &error_msg));
-        if (zip_archive.get() == nullptr) {
-          LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location_ << "': "
-              << error_msg;
-          return false;
-        }
-        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &opened_dex_files_)) {
-          LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location_
-              << "': " << error_msg;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-        ATRACE_END();
-      } else {
-        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, &opened_dex_files_);
-        if (failure_count > 0) {
-          LOG(ERROR) << "Failed to open some dex files: " << failure_count;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-      }
-
+    if (!IsBootImage()) {
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
         SaveDexInput();
       }
+
+      // Handle and ClassLoader creation needs to come after Runtime::Create.
+      ScopedObjectAccess soa(self);
+
+      // Classpath: first the class-path given.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+
+      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
+      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+
+      class_loader_ = class_linker->CreatePathClassLoader(self, class_path_files);
     }
-    // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
-    // the dex caches stay live since we don't want class unloading to occur during compilation.
-    for (const auto& dex_file : dex_files_) {
-      if (!dex_file->EnableWrite()) {
-        PLOG(ERROR) << "Failed to make .dex file writeable '" << dex_file->GetLocation() << "'\n";
+
+    // Ensure opened dex files are writable for dex-to-dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Protect(PROT_READ | PROT_WRITE)) {
+        PLOG(ERROR) << "Failed to make .dex files writeable.";
+        return false;
       }
+    }
+
+    // Ensure that the dex caches stay live since we don't want class unloading
+    // to occur during compilation.
+    for (const auto& dex_file : dex_files_) {
       ScopedObjectAccess soa(self);
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
           class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
-      dex_file->CreateTypeLookupTable();
     }
 
     /*
@@ -1353,59 +1442,11 @@
     return true;
   }
 
-  void CreateDexOatMappings() {
-    if (oat_files_.size() > 1) {
-      size_t index = 0;
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*> dex_files;
-        if (index < dex_files_.size()) {
-          dex_files.push_back(dex_files_[index]);
-          dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-          index++;
-          while (index < dex_files_.size() &&
-              (dex_files_[index]->GetBaseLocation() == dex_files_[index - 1]->GetBaseLocation())) {
-            dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-            dex_files.push_back(dex_files_[index]);
-            index++;
-          }
-        }
-        dex_files_per_oat_file_.push_back(std::move(dex_files));
-      }
-    } else {
-      dex_files_per_oat_file_.push_back(dex_files_);
-      for (const DexFile* dex_file : dex_files_) {
-        dex_file_oat_filename_map_.emplace(dex_file, oat_filenames_[0]);
-      }
-    }
-  }
-
   // Create and invoke the compiler driver. This will compile all the dex files.
   void Compile() {
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
     compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
-    // Handle and ClassLoader creation needs to come after Runtime::Create
-    jobject class_loader = nullptr;
-    Thread* self = Thread::Current();
-
-    if (!boot_image_filename_.empty()) {
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
-      ScopedObjectAccess soa(self);
-
-      // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
-
-      // Store the classpath we have right now.
-      key_value_store_->Put(OatHeader::kClassPathKey,
-                            OatFile::EncodeDexFileDependencies(class_path_files));
-
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
-
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
-    }
-
     // Find the dex file we should not inline from.
 
     // For now, on the host always have core-oj removed.
@@ -1453,49 +1494,6 @@
       }
     }
 
-    if (IsBootImage() && image_filenames_.size() > 1) {
-      // If we're compiling the boot image, store the boot classpath into the Key-Value store. If
-      // the image filename was adapted (e.g., for our tests), we need to change this here, too, but
-      // need to strip all path components (they will be re-established when loading).
-      // We need this for the multi-image case.
-      std::ostringstream bootcp_oss;
-      bool first_bootcp = true;
-      for (size_t i = 0; i < dex_locations_.size(); ++i) {
-        if (!first_bootcp) {
-          bootcp_oss << ":";
-        }
-
-        std::string dex_loc = dex_locations_[i];
-        std::string image_filename = image_filenames_[i];
-
-        // Use the dex_loc path, but the image_filename name (without path elements).
-        size_t dex_last_slash = dex_loc.rfind('/');
-
-        // npos is max(size_t). That makes this a bit ugly.
-        size_t image_last_slash = image_filename.rfind('/');
-        size_t image_last_at = image_filename.rfind('@');
-        size_t image_last_sep = (image_last_slash == std::string::npos)
-                                    ? image_last_at
-                                    : (image_last_at == std::string::npos)
-                                          ? std::string::npos
-                                          : std::max(image_last_slash, image_last_at);
-        // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
-
-        if (dex_last_slash == std::string::npos) {
-          dex_loc = image_filename.substr(image_last_sep + 1);
-        } else {
-          dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
-              image_filename.substr(image_last_sep + 1);
-        }
-
-        // Image filenames already end with .art, no need to replace.
-
-        bootcp_oss << dex_loc;
-        first_bootcp = false;
-      }
-      key_value_store_->Put(OatHeader::kBootClassPath, bootcp_oss.str());
-    }
-
     driver_.reset(new CompilerDriver(compiler_options_.get(),
                                      verification_results_.get(),
                                      &method_inliner_map_,
@@ -1509,14 +1507,12 @@
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
-                                     dump_cfg_file_name_,
-                                     dump_cfg_append_,
                                      compiler_phases_timings_.get(),
                                      swap_fd_,
                                      &dex_file_oat_filename_map_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1584,19 +1580,18 @@
   // ImageWriter, if necessary.
   // Note: Flushing (and closing) the file is the caller's responsibility, except for the failure
   //       case (when the file will be explicitly erased).
-  bool CreateOatFiles() {
-    CHECK(key_value_store_.get() != nullptr);
-
+  bool WriteOatFiles() {
     TimingLogger::ScopedTiming t("dex2oat Oat", timings_);
 
-    std::vector<std::unique_ptr<OatWriter>> oat_writers;
-    {
-      TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings_);
-      std::string image_file_location;
-      uint32_t image_file_location_oat_checksum = 0;
-      uintptr_t image_file_location_oat_data_begin = 0;
-      int32_t image_patch_delta = 0;
+    // Sync the data to the file, in case we did dex2dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Sync()) {
+        PLOG(ERROR) << "Failed to Sync() dex2dex output. Map: " << map->GetName();
+        return false;
+      }
+    }
 
+    if (IsImage()) {
       if (app_image_ && image_base_ == 0) {
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
@@ -1608,47 +1603,15 @@
         VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
       }
 
-      if (IsImage()) {
-        PrepareImageWriter(image_base_);
-      }
+      image_writer_.reset(new ImageWriter(*driver_,
+                                          image_base_,
+                                          compiler_options_->GetCompilePic(),
+                                          IsAppImage(),
+                                          image_storage_mode_,
+                                          oat_filenames_,
+                                          dex_file_oat_filename_map_));
 
-      if (!IsBootImage()) {
-        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        std::vector<gc::space::ImageSpace*> image_spaces =
-            Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum = image_spaces[0]->GetImageHeader().GetOatChecksum();
-        image_file_location_oat_data_begin =
-            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
-        image_patch_delta = image_spaces[0]->GetImageHeader().GetPatchDelta();
-        std::vector<std::string> image_filenames;
-        for (const gc::space::ImageSpace* image_space : image_spaces) {
-          image_filenames.push_back(image_space->GetImageFilename());
-        }
-        image_file_location = Join(image_filenames, ':');
-      }
-
-      if (!image_file_location.empty()) {
-        key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
-      }
-
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
-        std::unique_ptr<OatWriter> oat_writer(new OatWriter(dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_data_begin,
-                                                            image_patch_delta,
-                                                            driver_.get(),
-                                                            image_writer_.get(),
-                                                            IsBootImage(),
-                                                            timings_,
-                                                            key_value_store_.get()));
-        oat_writers.push_back(std::move(oat_writer));
-      }
-    }
-
-    if (IsImage()) {
-      // The OatWriter constructor has already updated offsets in methods and we need to
-      // prepare method offsets in the image address space for direct method patching.
+      // We need to prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
       if (!image_writer_->PrepareImageAddressSpace()) {
         LOG(ERROR) << "Failed to prepare image address space.";
@@ -1658,20 +1621,22 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
+      for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         std::unique_ptr<File>& oat_file = oat_files_[i];
-        std::unique_ptr<OatWriter>& oat_writer = oat_writers[i];
-        std::unique_ptr<ElfWriter> elf_writer =
-            CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get());
+        std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
-        elf_writer->Start();
+        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+        oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files);
 
-        OutputStream* rodata = elf_writer->StartRoData();
+        OutputStream*& rodata = rodata_[i];
+        DCHECK(rodata != nullptr);
         if (!oat_writer->WriteRodata(rodata)) {
           LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file->GetPath();
           return false;
         }
         elf_writer->EndRoData(rodata);
+        rodata = nullptr;
 
         OutputStream* text = elf_writer->StartText();
         if (!oat_writer->WriteCode(text)) {
@@ -1680,6 +1645,14 @@
         }
         elf_writer->EndText(text);
 
+        if (!oat_writer->WriteHeader(elf_writer->GetStream(),
+                                     image_file_location_oat_checksum_,
+                                     image_file_location_oat_data_begin_,
+                                     image_patch_delta_)) {
+          LOG(ERROR) << "Failed to write oat header to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+
         elf_writer->SetBssSize(oat_writer->GetBssSize());
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
@@ -1705,6 +1678,9 @@
         }
 
         VLOG(compiler) << "Oat file written successfully: " << oat_filenames_[i];
+
+        oat_writer.reset();
+        elf_writer.reset();
       }
     }
 
@@ -1822,17 +1798,27 @@
   }
 
   bool UseProfileGuidedCompilation() const {
-    return !profile_files_.empty();
+    return !profile_files_.empty() || !profile_files_fd_.empty();
   }
 
   bool ProcessProfiles() {
     DCHECK(UseProfileGuidedCompilation());
     ProfileCompilationInfo* info = nullptr;
-    if (ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_files_, &info)) {
-      profile_compilation_info_.reset(info);
-      return true;
+    bool result = false;
+    if (profile_files_.empty()) {
+      DCHECK(!profile_files_fd_.empty());
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_fd_, reference_profile_files_fd_, &info);
+      CloseAllFds(profile_files_fd_, "profile_files_fd_");
+      CloseAllFds(reference_profile_files_fd_, "reference_profile_files_fd_");
+    } else {
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_, reference_profile_files_, &info);
     }
-    return false;
+
+    profile_compilation_info_.reset(info);
+
+    return result;
   }
 
   bool ShouldCompileBasedOnProfiles() const {
@@ -1852,65 +1838,78 @@
     return result;
   }
 
-  static size_t OpenDexFiles(std::vector<const char*>& dex_filenames,
-                             std::vector<const char*>& dex_locations,
-                             std::vector<std::unique_ptr<const DexFile>>* dex_files) {
-    DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is nullptr";
-    size_t failure_count = 0;
-    for (size_t i = 0; i < dex_filenames.size(); i++) {
-      const char* dex_filename = dex_filenames[i];
-      const char* dex_location = dex_locations[i];
-      ATRACE_BEGIN(StringPrintf("Opening dex file '%s'", dex_filenames[i]).c_str());
-      std::string error_msg;
-      if (!OS::FileExists(dex_filename)) {
-        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
-        dex_filenames.erase(dex_filenames.begin() + i);
-        dex_locations.erase(dex_locations.begin() + i);
-        i--;
-        continue;
+  std::string GetMultiImageBootClassPath() {
+    DCHECK(IsBootImage());
+    DCHECK_GT(oat_filenames_.size(), 1u);
+    // If the image filename was adapted (e.g., for our tests), we need to change this here,
+    // too, but need to strip all path components (they will be re-established when loading).
+    std::ostringstream bootcp_oss;
+    bool first_bootcp = true;
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      if (!first_bootcp) {
+        bootcp_oss << ":";
       }
-      if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
-        LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
-        ++failure_count;
+
+      std::string dex_loc = dex_locations_[i];
+      std::string image_filename = image_filenames_[i];
+
+      // Use the dex_loc path, but the image_filename name (without path elements).
+      size_t dex_last_slash = dex_loc.rfind('/');
+
+      // npos is max(size_t). That makes this a bit ugly.
+      size_t image_last_slash = image_filename.rfind('/');
+      size_t image_last_at = image_filename.rfind('@');
+      size_t image_last_sep = (image_last_slash == std::string::npos)
+                                  ? image_last_at
+                                  : (image_last_at == std::string::npos)
+                                        ? std::string::npos
+                                        : std::max(image_last_slash, image_last_at);
+      // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
+
+      if (dex_last_slash == std::string::npos) {
+        dex_loc = image_filename.substr(image_last_sep + 1);
+      } else {
+        dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
+            image_filename.substr(image_last_sep + 1);
       }
-      ATRACE_END();
+
+      // Image filenames already end with .art, no need to replace.
+
+      bootcp_oss << dex_loc;
+      first_bootcp = false;
     }
-    return failure_count;
+    return bootcp_oss.str();
   }
 
-  // Returns true if dex_files has a dex with the named location. We compare canonical locations,
-  // so that relative and absolute paths will match. Not caching for the dex_files isn't very
-  // efficient, but under normal circumstances the list is neither large nor is this part too
-  // sensitive.
-  static bool DexFilesContains(const std::vector<const DexFile*>& dex_files,
-                               const std::string& location) {
-    std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str()));
-    for (size_t i = 0; i < dex_files.size(); ++i) {
-      if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) ==
-          canonical_location) {
-        return true;
-      }
+  std::vector<std::string> GetClassPathLocations(const std::string& class_path) {
+    // This function is used only for apps and for an app we have exactly one oat file.
+    DCHECK(!IsBootImage());
+    DCHECK_EQ(oat_writers_.size(), 1u);
+    std::vector<std::string> dex_files_canonical_locations;
+    for (const char* location : oat_writers_[0]->GetSourceLocations()) {
+      dex_files_canonical_locations.push_back(DexFile::GetDexCanonicalLocation(location));
     }
-    return false;
-  }
 
-  // Appends to opened_dex_files any elements of class_path that dex_files
-  // doesn't already contain. This will open those dex files as necessary.
-  static void OpenClassPathFiles(const std::string& class_path,
-                                 std::vector<const DexFile*> dex_files,
-                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
-    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
     std::vector<std::string> parsed;
     Split(class_path, ':', &parsed);
-    // Take Locks::mutator_lock_ so that lock ordering on the ClassLinker::dex_lock_ is maintained.
-    ScopedObjectAccess soa(Thread::Current());
-    for (size_t i = 0; i < parsed.size(); ++i) {
-      if (DexFilesContains(dex_files, parsed[i])) {
-        continue;
-      }
+    auto kept_it = std::remove_if(parsed.begin(),
+                                  parsed.end(),
+                                  [dex_files_canonical_locations](const std::string& location) {
+      return ContainsElement(dex_files_canonical_locations,
+                             DexFile::GetDexCanonicalLocation(location.c_str()));
+    });
+    parsed.erase(kept_it, parsed.end());
+    return parsed;
+  }
+
+  // Opens requested class path files and appends them to opened_dex_files.
+  static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
+    for (const std::string& location : class_path_locations) {
       std::string error_msg;
-      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, opened_dex_files)) {
-        LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
+      if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
+        LOG(WARNING) << "Failed to open dex file '" << location << "': " << error_msg;
       }
     }
   }
@@ -1985,6 +1984,63 @@
     return true;
   }
 
+  void PruneNonExistentDexFiles() {
+    DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+    size_t kept = 0u;
+    for (size_t i = 0, size = dex_filenames_.size(); i != size; ++i) {
+      if (!OS::FileExists(dex_filenames_[i])) {
+        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filenames_[i] << "'";
+      } else {
+        dex_filenames_[kept] = dex_filenames_[i];
+        dex_locations_[kept] = dex_locations_[i];
+        ++kept;
+      }
+    }
+    dex_filenames_.resize(kept);
+    dex_locations_.resize(kept);
+  }
+
+  bool AddDexFileSources() {
+    TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
+    if (zip_fd_ != -1) {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      if (!oat_writers_[0]->AddZippedDexFilesSource(ScopedFd(zip_fd_), zip_location_.c_str())) {
+        return false;
+      }
+    } else if (oat_writers_.size() > 1u) {
+      // Multi-image.
+      DCHECK_EQ(oat_writers_.size(), dex_filenames_.size());
+      DCHECK_EQ(oat_writers_.size(), dex_locations_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        if (!oat_writers_[i]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    } else {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+      DCHECK_NE(dex_filenames_.size(), 0u);
+      for (size_t i = 0; i != dex_filenames_.size(); ++i) {
+        if (!oat_writers_[0]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  void CreateOatWriters() {
+    TimingLogger::ScopedTiming t2("CreateOatWriters", timings_);
+    elf_writers_.reserve(oat_files_.size());
+    oat_writers_.reserve(oat_files_.size());
+    for (const std::unique_ptr<File>& oat_file : oat_files_) {
+      elf_writers_.emplace_back(
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get()));
+      elf_writers_.back()->Start();
+      oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
+    }
+  }
+
   void SaveDexInput() {
     for (size_t i = 0; i < dex_files_.size(); ++i) {
       const DexFile* dex_file = dex_files_[i];
@@ -2048,8 +2104,8 @@
   }
 
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(RuntimeArgumentMap&& runtime_options)
-      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options) {
+    TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
     if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
@@ -2070,18 +2126,12 @@
 
     runtime_->GetClassLinker()->RunRootClinits();
 
-    return true;
-  }
+    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+    // Runtime::Start, give it away now so that we don't starve GC.
+    Thread* self = Thread::Current();
+    self->TransitionFromRunnableToSuspended(kNative);
 
-  void PrepareImageWriter(uintptr_t image_base) {
-    DCHECK(IsImage());
-    image_writer_.reset(new ImageWriter(*driver_,
-                                        image_base,
-                                        compiler_options_->GetCompilePic(),
-                                        IsAppImage(),
-                                        image_storage_mode_,
-                                        oat_filenames_,
-                                        dex_file_oat_filename_map_));
+    return true;
   }
 
   // Let the ImageWriter write the image files. If we do not compile PIC, also fix up the oat files.
@@ -2260,6 +2310,9 @@
   InstructionSet instruction_set_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
+  uint32_t image_file_location_oat_checksum_;
+  uintptr_t image_file_location_oat_data_begin_;
+  int32_t image_patch_delta_;
   std::unique_ptr<SafeMap<std::string, std::string> > key_value_store_;
 
   std::unique_ptr<VerificationResults> verification_results_;
@@ -2267,11 +2320,11 @@
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 
+  std::unique_ptr<Runtime> runtime_;
+
   // Ownership for the class path files.
   std::vector<std::unique_ptr<const DexFile>> class_path_files_;
 
-  std::unique_ptr<Runtime> runtime_;
-
   size_t thread_count_;
   uint64_t start_ns_;
   std::unique_ptr<WatchDog> watchdog_;
@@ -2306,24 +2359,30 @@
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  jobject class_loader_;
 
+  std::vector<std::unique_ptr<ElfWriter>> elf_writers_;
+  std::vector<std::unique_ptr<OatWriter>> oat_writers_;
+  std::vector<OutputStream*> rodata_;
   std::unique_ptr<ImageWriter> image_writer_;
   std::unique_ptr<CompilerDriver> driver_;
 
+  std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
   bool dump_passes_;
   bool dump_timing_;
   bool dump_slow_timing_;
-  std::string dump_cfg_file_name_;
-  bool dump_cfg_append_;
   std::string swap_file_name_;
   int swap_fd_;
   std::string app_image_file_name_;
   int app_image_fd_;
   std::vector<std::string> profile_files_;
   std::vector<std::string> reference_profile_files_;
+  std::vector<uint32_t> profile_files_fd_;
+  std::vector<uint32_t> reference_profile_files_fd_;
   std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
@@ -2359,7 +2418,7 @@
 static int CompileImage(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2398,7 +2457,7 @@
 static int CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2455,6 +2514,11 @@
     }
   }
 
+  // Check early that the result of compilation can be written
+  if (!dex2oat.OpenFile()) {
+    return EXIT_FAILURE;
+  }
+
   // Print the complete line when any of the following is true:
   //   1) Debug build
   //   2) Compiling an image
@@ -2468,11 +2532,6 @@
   }
 
   if (!dex2oat.Setup()) {
-    return EXIT_FAILURE;
-  }
-
-  // Check early that the result of compilation can be written
-  if (!dex2oat.OpenFile()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4bef0f..1f74c93 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -938,6 +938,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xB8:
+        opcode1 = "popcnt";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBE:
         opcode1 = "movsxb";
         has_modrm = true;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 69e767d..7b9ce5b 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -395,6 +395,9 @@
     os << "MAGIC:\n";
     os << oat_header.GetMagic() << "\n\n";
 
+    os << "LOCATION:\n";
+    os << oat_file_.GetLocation() << "\n\n";
+
     os << "CHECKSUM:\n";
     os << StringPrintf("0x%08x\n\n", oat_header.GetChecksum());
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 7170f73..d4b873e 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -378,8 +378,8 @@
           "memory");  // clobber.
 #elif defined(__mips__) && defined(__LP64__)
     __asm__ __volatile__ (
-        // Spill a0-a7 and t0-t3 which we say we don't clobber. May contain args.
-        "daddiu $sp, $sp, -96\n\t"
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
         "sd $a0, 0($sp)\n\t"
         "sd $a1, 8($sp)\n\t"
         "sd $a2, 16($sp)\n\t"
@@ -388,10 +388,6 @@
         "sd $a5, 40($sp)\n\t"
         "sd $a6, 48($sp)\n\t"
         "sd $a7, 56($sp)\n\t"
-        "sd $t0, 64($sp)\n\t"
-        "sd $t1, 72($sp)\n\t"
-        "sd $t2, 80($sp)\n\t"
-        "sd $t3, 88($sp)\n\t"
 
         "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
         "sd %[referrer], 0($sp)\n\t"
@@ -427,18 +423,16 @@
         "ld $a5, 40($sp)\n\t"
         "ld $a6, 48($sp)\n\t"
         "ld $a7, 56($sp)\n\t"
-        "ld $t0, 64($sp)\n\t"
-        "ld $t1, 72($sp)\n\t"
-        "ld $t2, 80($sp)\n\t"
-        "ld $t3, 88($sp)\n\t"
-        "daddiu $sp, $sp, 96\n\t"
+        "daddiu $sp, $sp, 64\n\t"
 
         "move %[result], $v0\n\t"   // Store the call result.
         : [result] "=r" (result)
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [hidden] "r"(hidden)
-        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-          "t8", "t9", "k0", "k1", "fp", "ra",
+        // Instead aliases t0-t3, register names $12-$15 has been used in the clobber list because
+        // t0-t3 are ambiguous.
+        : "at", "v0", "v1", "$12", "$13", "$14", "$15", "s0", "s1", "s2", "s3", "s4", "s5", "s6",
+          "s7", "t8", "t9", "k0", "k1", "fp", "ra",
           "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
           "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
           "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 42f5df4..da01ee4 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -50,6 +50,10 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_with_popcnt[] = {
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -69,6 +73,11 @@
                                                arraysize(x86_variants_prefer_locked_add_sync),
                                                variant);
 
+  bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
+                                       arraysize(x86_variants_with_popcnt),
+                                       variant);
+
+  // Verify that variant is known.
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -77,10 +86,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -93,12 +102,15 @@
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
   bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
+  bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                                has_AVX, has_AVX2, prefers_locked_add);
+                                            has_AVX, has_AVX2, prefers_locked_add,
+                                            has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                             has_AVX, has_AVX2, prefers_locked_add);
+                                         has_AVX, has_AVX2, prefers_locked_add,
+                                         has_POPCNT);
   }
 }
 
@@ -138,12 +150,15 @@
   // No #define for memory synchronization preference.
   const bool prefers_locked_add = false;
 
+  // No #define for popcnt.
+  const bool has_POPCNT = false;
+
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -158,6 +173,7 @@
   bool has_AVX2 = false;
   // No cpuinfo for memory synchronization preference.
   const bool prefers_locked_add = false;
+  bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -183,6 +199,9 @@
           if (line.find("avx2") != std::string::npos) {
             has_AVX2 = true;
           }
+          if (line.find("popcnt") != std::string::npos) {
+            has_POPCNT = true;
+          }
         } else if (line.find("processor") != std::string::npos &&
             line.find(": 1") != std::string::npos) {
           smp = true;
@@ -195,10 +214,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -223,7 +242,8 @@
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
       (has_AVX2_ == other_as_x86->has_AVX2_) &&
-      (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
+      (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
+      (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -233,7 +253,8 @@
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
       (has_AVX2_ ? kAvx2Bitfield : 0) |
-      (prefers_locked_add_ ? kPrefersLockedAdd : 0);
+      (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
+      (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -273,6 +294,11 @@
   } else {
     result += ",-lock_add";
   }
+  if (has_POPCNT_) {
+    result += ",popcnt";
+  } else {
+    result += ",-popcnt";
+  }
   return result;
 }
 
@@ -285,6 +311,7 @@
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
   bool prefers_locked_add = prefers_locked_add_;
+  bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -311,6 +338,10 @@
       prefers_locked_add = true;
     } else if (feature == "-lock_add") {
       prefers_locked_add = false;
+    } else if (feature == "popcnt") {
+      has_POPCNT = true;
+    } else if (feature == "-popcnt") {
+      has_POPCNT = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -318,10 +349,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 2b845f8..1819654 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -62,6 +62,8 @@
 
   bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
 
+  bool HasPopCnt() const { return has_POPCNT_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -75,10 +77,17 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2, bool prefers_locked_add)
-      : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
-        prefers_locked_add_(prefers_locked_add) {
+                            bool has_AVX, bool has_AVX2,
+                            bool prefers_locked_add,
+                            bool has_POPCNT)
+      : InstructionSetFeatures(smp),
+        has_SSSE3_(has_SSSE3),
+        has_SSE4_1_(has_SSE4_1),
+        has_SSE4_2_(has_SSE4_2),
+        has_AVX_(has_AVX),
+        has_AVX2_(has_AVX2),
+        prefers_locked_add_(prefers_locked_add),
+        has_POPCNT_(has_POPCNT) {
   }
 
  private:
@@ -91,6 +100,7 @@
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
     kPrefersLockedAdd = 64,
+    kPopCntBitfield = 128,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -99,6 +109,7 @@
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
   const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
+  const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index e8d01e6..a062c12 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
@@ -40,7 +40,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 67U);
 
@@ -50,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -60,7 +60,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
 
@@ -77,9 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_features->AsBitmap(), 207U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -97,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index b8000d0..aba7234 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,9 +74,10 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+                               bool has_AVX, bool has_AVX2, bool prefers_locked_add,
+                               bool has_POPCNT)
       : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                  has_AVX2, prefers_locked_add) {
+                                  has_AVX2, prefers_locked_add, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 4562c64..78aeacf 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 71e0590..814cbd0 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -26,16 +26,25 @@
 namespace art {
 
 bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  return Init(filename, O_CREAT | O_RDWR, true, error_msg);
+}
+
+bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) {
   while (true) {
     if (file_.get() != nullptr) {
       UNUSED(file_->FlushCloseOrErase());  // Ignore result.
     }
-    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    file_.reset(OS::OpenFileWithFlags(filename, flags));
     if (file_.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
       return false;
     }
-    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    int operation = block ? LOCK_EX : (LOCK_EX | LOCK_NB);
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), operation));
+    if (flock_result == EWOULDBLOCK) {
+      // File is locked by someone else and we are required not to block;
+      return false;
+    }
     if (flock_result != 0) {
       *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
       return false;
@@ -51,11 +60,23 @@
     if (stat_result != 0) {
       PLOG(WARNING) << "Failed to stat, will retry: " << filename;
       // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // Note that in theory we could race with someone here for a long time and end up retrying
+        // over and over again. This potential behavior does not fit well in the non-blocking
+        // semantics. Thus, if we are not require to block return failure when racing.
+        return false;
+      }
     }
     if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
       LOG(WARNING) << "File changed while locking, will retry: " << filename;
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // See comment above.
+        return false;
+      }
     }
     return true;
   }
@@ -78,7 +99,7 @@
   return true;
 }
 
-File* ScopedFlock::GetFile() {
+File* ScopedFlock::GetFile() const {
   CHECK(file_.get() != nullptr);
   return file_.get();
 }
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 08612e3..cc22056 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -32,10 +32,15 @@
   // Attempts to acquire an exclusive file lock (see flock(2)) on the file
   // at filename, and blocks until it can do so.
   //
-  // Returns true if the lock could be acquired, or false if an error
-  // occurred. It is an error if the file does not exist, or if its inode
-  // changed (usually due to a new file being created at the same path)
-  // between attempts to lock it.
+  // Returns true if the lock could be acquired, or false if an error occurred.
+  // It is an error if its inode changed (usually due to a new file being
+  // created at the same path) between attempts to lock it. In blocking mode,
+  // locking will be retried if the file changed. In non-blocking mode, false
+  // is returned and no attempt is made to re-acquire the lock.
+  //
+  // The file is opened with the provided flags.
+  bool Init(const char* filename, int flags, bool block, std::string* error_msg);
+  // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg)
   bool Init(const char* filename, std::string* error_msg);
   // Attempt to acquire an exclusive file lock (see flock(2)) on 'file'.
   // Returns true if the lock could be acquired or false if an error
@@ -43,7 +48,7 @@
   bool Init(File* file, std::string* error_msg);
 
   // Returns the (locked) file associated with this instance.
-  File* GetFile();
+  File* GetFile() const;
 
   // Returns whether a file is held.
   bool HasFile();
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 78bc3d5..e17bebb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -316,4 +316,21 @@
   guard_state_ = GuardState::kNoCheck;
 }
 
+bool FdFile::ClearContent() {
+  if (SetLength(0) < 0) {
+    PLOG(art::ERROR) << "Failed to reset the length";
+    return false;
+  }
+  return ResetOffset();
+}
+
+bool FdFile::ResetOffset() {
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd_, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(art::ERROR) << "Failed to reset the offset";
+    return false;
+  }
+  return true;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 231a1ab..1e2d8af 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -79,6 +79,11 @@
 
   // Copy data from another file.
   bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+  // Clears the file content and resets the file offset to 0.
+  // Returns true upon success, false otherwise.
+  bool ClearContent();
+  // Resets the file offset to the beginning of the file.
+  bool ResetOffset();
 
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index ddd285a..ed833c4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1880,6 +1880,9 @@
    */
   Dbg::PostClassPrepare(h_new_class.Get());
 
+  // Notify native debugger of the new class and its layout.
+  jit::Jit::NewTypeLoadedIfUsingJit(h_new_class.Get());
+
   return h_new_class.Get();
 }
 
@@ -2766,6 +2769,7 @@
 
   mirror::Class* existing = InsertClass(descriptor, new_class.Get(), hash);
   if (existing == nullptr) {
+    jit::Jit::NewTypeLoadedIfUsingJit(new_class.Get());
     return new_class.Get();
   }
   // Another thread must have loaded the class after we
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 2184f0a..3df9101 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -117,14 +117,15 @@
 
 static bool unstarted_initialized_ = false;
 
-CommonRuntimeTest::CommonRuntimeTest() {}
-CommonRuntimeTest::~CommonRuntimeTest() {
+CommonRuntimeTestImpl::CommonRuntimeTestImpl() {}
+
+CommonRuntimeTestImpl::~CommonRuntimeTestImpl() {
   // Ensure the dex files are cleaned up before the runtime.
   loaded_dex_files_.clear();
   runtime_.reset();
 }
 
-void CommonRuntimeTest::SetUpAndroidRoot() {
+void CommonRuntimeTestImpl::SetUpAndroidRoot() {
   if (IsHost()) {
     // $ANDROID_ROOT is set on the device, but not necessarily on the host.
     // But it needs to be set so that icu4c can find its locale data.
@@ -166,7 +167,7 @@
   }
 }
 
-void CommonRuntimeTest::SetUpAndroidData(std::string& android_data) {
+void CommonRuntimeTestImpl::SetUpAndroidData(std::string& android_data) {
   // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
   if (IsHost()) {
     const char* tmpdir = getenv("TMPDIR");
@@ -185,7 +186,8 @@
   setenv("ANDROID_DATA", android_data.c_str(), 1);
 }
 
-void CommonRuntimeTest::TearDownAndroidData(const std::string& android_data, bool fail_on_error) {
+void CommonRuntimeTestImpl::TearDownAndroidData(const std::string& android_data,
+                                                bool fail_on_error) {
   if (fail_on_error) {
     ASSERT_EQ(rmdir(android_data.c_str()), 0);
   } else {
@@ -230,18 +232,18 @@
   }
 
   if (founddir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory.";
+    ADD_FAILURE() << "Cannot find Android tools directory.";
   }
   return founddir;
 }
 
-std::string CommonRuntimeTest::GetAndroidHostToolsDir() {
+std::string CommonRuntimeTestImpl::GetAndroidHostToolsDir() {
   return GetAndroidToolsDir("prebuilts/gcc/linux-x86/host",
                             "x86_64-linux-glibc2.15",
                             "x86_64-linux");
 }
 
-std::string CommonRuntimeTest::GetAndroidTargetToolsDir(InstructionSet isa) {
+std::string CommonRuntimeTestImpl::GetAndroidTargetToolsDir(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -269,15 +271,16 @@
   return "";
 }
 
-std::string CommonRuntimeTest::GetCoreArtLocation() {
+std::string CommonRuntimeTestImpl::GetCoreArtLocation() {
   return GetCoreFileLocation("art");
 }
 
-std::string CommonRuntimeTest::GetCoreOatLocation() {
+std::string CommonRuntimeTestImpl::GetCoreOatLocation() {
   return GetCoreFileLocation("oat");
 }
 
-std::unique_ptr<const DexFile> CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
+std::unique_ptr<const DexFile> CommonRuntimeTestImpl::LoadExpectSingleDexFile(
+    const char* location) {
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::string error_msg;
   MemMap::Init();
@@ -290,7 +293,7 @@
   }
 }
 
-void CommonRuntimeTest::SetUp() {
+void CommonRuntimeTestImpl::SetUp() {
   SetUpAndroidRoot();
   SetUpAndroidData(android_data_);
   dalvik_cache_.append(android_data_.c_str());
@@ -345,7 +348,7 @@
   FinalizeSetup();
 }
 
-void CommonRuntimeTest::FinalizeSetup() {
+void CommonRuntimeTestImpl::FinalizeSetup() {
   // Initialize maps for unstarted runtime. This needs to be here, as running clinits needs this
   // set up.
   if (!unstarted_initialized_) {
@@ -369,7 +372,7 @@
   runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U);
 }
 
-void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
+void CommonRuntimeTestImpl::ClearDirectory(const char* dirpath) {
   ASSERT_TRUE(dirpath != nullptr);
   DIR* dir = opendir(dirpath);
   ASSERT_TRUE(dir != nullptr);
@@ -396,7 +399,7 @@
   closedir(dir);
 }
 
-void CommonRuntimeTest::TearDown() {
+void CommonRuntimeTestImpl::TearDown() {
   const char* android_data = getenv("ANDROID_DATA");
   ASSERT_TRUE(android_data != nullptr);
   ClearDirectory(dalvik_cache_.c_str());
@@ -453,12 +456,12 @@
   return StringPrintf("%s/framework/%s%s.jar", path.c_str(), jar_prefix.c_str(), suffix.c_str());
 }
 
-std::vector<std::string> CommonRuntimeTest::GetLibCoreDexFileNames() {
+std::vector<std::string> CommonRuntimeTestImpl::GetLibCoreDexFileNames() {
   return std::vector<std::string>({GetDexFileName("core-oj", IsHost()),
                                    GetDexFileName("core-libart", IsHost())});
 }
 
-std::string CommonRuntimeTest::GetTestAndroidRoot() {
+std::string CommonRuntimeTestImpl::GetTestAndroidRoot() {
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != nullptr);
@@ -478,7 +481,7 @@
 #define ART_TARGET_NATIVETEST_DIR_STRING ""
 #endif
 
-std::string CommonRuntimeTest::GetTestDexFileName(const char* name) {
+std::string CommonRuntimeTestImpl::GetTestDexFileName(const char* name) {
   CHECK(name != nullptr);
   std::string filename;
   if (IsHost()) {
@@ -493,7 +496,8 @@
   return filename;
 }
 
-std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTest::OpenTestDexFiles(const char* name) {
+std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTestImpl::OpenTestDexFiles(
+    const char* name) {
   std::string filename = GetTestDexFileName(name);
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
@@ -506,13 +510,13 @@
   return dex_files;
 }
 
-std::unique_ptr<const DexFile> CommonRuntimeTest::OpenTestDexFile(const char* name) {
+std::unique_ptr<const DexFile> CommonRuntimeTestImpl::OpenTestDexFile(const char* name) {
   std::vector<std::unique_ptr<const DexFile>> vector = OpenTestDexFiles(name);
   EXPECT_EQ(1U, vector.size());
   return std::move(vector[0]);
 }
 
-std::vector<const DexFile*> CommonRuntimeTest::GetDexFiles(jobject jclass_loader) {
+std::vector<const DexFile*> CommonRuntimeTestImpl::GetDexFiles(jobject jclass_loader) {
   std::vector<const DexFile*> ret;
 
   ScopedObjectAccess soa(Thread::Current());
@@ -572,7 +576,7 @@
   return ret;
 }
 
-const DexFile* CommonRuntimeTest::GetFirstDexFile(jobject jclass_loader) {
+const DexFile* CommonRuntimeTestImpl::GetFirstDexFile(jobject jclass_loader) {
   std::vector<const DexFile*> tmp(GetDexFiles(jclass_loader));
   DCHECK(!tmp.empty());
   const DexFile* ret = tmp[0];
@@ -580,7 +584,7 @@
   return ret;
 }
 
-jobject CommonRuntimeTest::LoadDex(const char* dex_name) {
+jobject CommonRuntimeTestImpl::LoadDex(const char* dex_name) {
   std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles(dex_name);
   std::vector<const DexFile*> class_path;
   CHECK_NE(0U, dex_files.size());
@@ -596,7 +600,7 @@
   return class_loader;
 }
 
-std::string CommonRuntimeTest::GetCoreFileLocation(const char* suffix) {
+std::string CommonRuntimeTestImpl::GetCoreFileLocation(const char* suffix) {
   CHECK(suffix != nullptr);
 
   std::string location;
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 7223b6e..0ce40e8 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -64,8 +64,10 @@
   std::unique_ptr<File> file_;
 };
 
-class CommonRuntimeTest : public testing::Test {
+class CommonRuntimeTestImpl {
  public:
+  CommonRuntimeTestImpl();
+  virtual ~CommonRuntimeTestImpl();
   static void SetUpAndroidRoot();
 
   // Note: setting up ANDROID_DATA may create a temporary directory. If this is used in a
@@ -74,19 +76,25 @@
 
   static void TearDownAndroidData(const std::string& android_data, bool fail_on_error);
 
-  CommonRuntimeTest();
-  ~CommonRuntimeTest();
-
   // Gets the paths of the libcore dex files.
   static std::vector<std::string> GetLibCoreDexFileNames();
 
   // Returns bin directory which contains host's prebuild tools.
   static std::string GetAndroidHostToolsDir();
 
-  // Returns bin directory which contains target's prebuild tools.
+  // Returns bin directory wahich contains target's prebuild tools.
   static std::string GetAndroidTargetToolsDir(InstructionSet isa);
 
  protected:
+  // Allow subclases such as CommonCompilerTest to add extra options.
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
+
+  // Called before the runtime is created.
+  virtual void PreRuntimeCreate() {}
+
+  // Called after the runtime is created.
+  virtual void PostRuntimeCreate() {}
+
   static bool IsHost() {
     return !kIsTargetBuild;
   }
@@ -99,25 +107,8 @@
 
   std::unique_ptr<const DexFile> LoadExpectSingleDexFile(const char* location);
 
-  virtual void SetUp();
-
-  // Allow subclases such as CommonCompilerTest to add extra options.
-  virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
-
   void ClearDirectory(const char* dirpath);
 
-  virtual void TearDown();
-
-  // Called before the runtime is created.
-  virtual void PreRuntimeCreate() {}
-
-  // Called after the runtime is created.
-  virtual void PostRuntimeCreate() {}
-
-  // Called to finish up runtime creation and filling test fields. By default runs root
-  // initializers, initialize well-known classes, and creates the heap thread pool.
-  virtual void FinalizeSetup();
-
   std::string GetTestAndroidRoot();
 
   std::string GetTestDexFileName(const char* name);
@@ -150,12 +141,45 @@
 
   std::unique_ptr<CompilerCallbacks> callbacks_;
 
+  void SetUp();
+
+  void TearDown();
+
+  void FinalizeSetup();
+
  private:
   static std::string GetCoreFileLocation(const char* suffix);
 
   std::vector<std::unique_ptr<const DexFile>> loaded_dex_files_;
 };
 
+template <typename TestType>
+class CommonRuntimeTestBase : public TestType, public CommonRuntimeTestImpl {
+ public:
+  CommonRuntimeTestBase() {}
+  virtual ~CommonRuntimeTestBase() {}
+
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTestImpl::SetUp();
+  }
+
+  virtual void TearDown() {
+    CommonRuntimeTestImpl::TearDown();
+  }
+
+  // Called to finish up runtime creation and filling test fields. By default runs root
+  // initializers, initialize well-known classes, and creates the heap thread pool.
+  virtual void FinalizeSetup() {
+    CommonRuntimeTestImpl::FinalizeSetup();
+  }
+};
+
+using CommonRuntimeTest = CommonRuntimeTestBase<testing::Test>;
+
+template <typename Param>
+using CommonRuntimeTestWithParam = CommonRuntimeTestBase<testing::TestWithParam<Param>>;
+
 // Sets a CheckJni abort hook to catch failures. Note that this will cause CheckJNI to carry on
 // rather than aborting, so be careful!
 class CheckJniAbortCatcher {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6e11cf8..a0f875d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -230,11 +230,11 @@
     Dbg::PostException(exception_object);
   }
 
-  // We only care about how many backward branches were executed in the Jit.
-  void BackwardBranch(Thread* /*thread*/, ArtMethod* method, int32_t dex_pc_offset)
+  // We only care about branches in the Jit.
+  void Branch(Thread* /*thread*/, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected backward branch event in debugger " << PrettyMethod(method)
-               << " " << dex_pc_offset;
+    LOG(ERROR) << "Unexpected branch event in debugger " << PrettyMethod(method)
+               << " " << dex_pc << ", " << dex_pc_offset;
   }
 
   // We only care about invokes in the Jit.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index bc8ba97..9b93c13 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -687,8 +687,8 @@
   return nullptr;
 }
 
-void DexFile::CreateTypeLookupTable() const {
-  lookup_table_.reset(TypeLookupTable::Create(*this));
+void DexFile::CreateTypeLookupTable(uint8_t* storage) const {
+  lookup_table_.reset(TypeLookupTable::Create(*this, storage));
 }
 
 // Given a signature place the type ids into the given vector
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8a3db6c..968b37b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1094,11 +1094,11 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
                             DexDebugNewLocalCb local_cb, void* context) const;
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
                                void* context) const;
 
@@ -1157,7 +1157,7 @@
     return lookup_table_.get();
   }
 
-  void CreateTypeLookupTable() const;
+  void CreateTypeLookupTable(uint8_t* storage = nullptr) const;
 
  private:
   // Opens a .dex file
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 272249c..b67af53 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -686,31 +686,6 @@
 // Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
 // just easier to break up bad cases.
 
-// Interface with an instance constructor.
-//
-// .class public interface LInterfaceMethodFlags;
-// .super Ljava/lang/Object;
-//
-// .method public static constructor <clinit>()V
-// .registers 1
-//     return-void
-// .end method
-//
-// .method public constructor <init>()V
-// .registers 1
-//     return-void
-// .end method
-static const char kMethodFlagsInterfaceWithInit[] =
-    "ZGV4CjAzNQDRNt+hZ6X3I+xe66iVlCW7h9I38HmN4SvUAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAF"
-    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAAAIAQAAzAAAAMwA"
-    "AADWAAAA3gAAAPYAAAAKAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAQAA"
-    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAADoBAAAAAAAACDxjbGluaXQ+AAY8aW5pdD4AFkxJbnRl"
-    "cmZhY2VNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAAAAAAAAAAAQAAAAAAAAAA"
-    "AAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAIAAImABJQCAYGABKgCAAALAAAAAAAAAAEA"
-    "AAAAAAAAAQAAAAUAAABwAAAAAgAAAAMAAACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAA"
-    "AAEAAACsAAAAAiAAAAUAAADMAAAAAxAAAAEAAAAQAQAAASAAAAIAAAAUAQAAACAAAAEAAAA6AQAA"
-    "ABAAAAEAAABMAQAA";
-
 // Standard interface. Use declared-synchronized again for 3B encoding.
 //
 // .class public interface LInterfaceMethodFlags;
@@ -751,13 +726,6 @@
 }
 
 TEST_F(DexFileVerifierTest, MethodAccessFlagsInterfaces) {
-  // Reject interface with <init>.
-  VerifyModification(
-      kMethodFlagsInterfaceWithInit,
-      "method_flags_interface_with_init",
-      [](DexFile* dex_file ATTRIBUTE_UNUSED) {},
-      "Non-clinit interface method 1 should not have code");
-
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_ok",
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 9a9f42b..0663b7e 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -193,10 +193,10 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Pass in false since the object can not be finalizable.
+    // Pass in false since the object cannot be finalizable.
     return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
   }
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -207,7 +207,7 @@
                                                       Thread* self,
                                                       gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -410,10 +410,19 @@
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
   } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    // Maintain interpreter-like semantics where NullPointerException is thrown
-    // after potential NoSuchMethodError from class linker.
-    ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-    return nullptr;  // Failure.
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     bool can_access_resolved_method =
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index ce6467a..7727b2d 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -180,7 +180,7 @@
   t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), false, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99e98bb..2784693 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -227,7 +227,7 @@
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 416510d..c8e913c 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,7 +34,7 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
-  // Mark compact colector.
+  // Mark compact collector.
   kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e7ea983..7b531ba 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -89,7 +89,6 @@
   class RegionSpace;
   class RosAllocSpace;
   class Space;
-  class SpaceTest;
   class ZygoteSpace;
 }  // namespace space
 
@@ -1335,7 +1334,6 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
-  friend class space::SpaceTest;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 39ba743..5e7f1a2 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -86,7 +86,7 @@
     // it to the mutator as long as the GC is not preserving references.
     if (LIKELY(collector_ != nullptr)) {
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // by finalizer referents. So we cannot return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
deleted file mode 100644
index 93fe155..0000000
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-#include "dlmalloc_space.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 2798b21..e70fe21 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -521,7 +521,7 @@
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(GetAddressForAllocationInfo(new_info));
-  // We always put our object at the start of the free block, there can not be another free block
+  // We always put our object at the start of the free block, there cannot be another free block
   // before it.
   if (kIsDebugBuild) {
     mprotect(obj, allocation_size, PROT_READ | PROT_WRITE);
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 05b484a..ad38724 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -22,7 +22,7 @@
 namespace gc {
 namespace space {
 
-class LargeObjectSpaceTest : public SpaceTest {
+class LargeObjectSpaceTest : public SpaceTest<CommonRuntimeTest> {
  public:
   void LargeObjectTest();
 
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
deleted file mode 100644
index 0c5be03..0000000
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/space_create_test.cc b/runtime/gc/space/space_create_test.cc
new file mode 100644
index 0000000..aea2d9f
--- /dev/null
+++ b/runtime/gc/space/space_create_test.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "space_test.h"
+
+#include "dlmalloc_space.h"
+#include "rosalloc_space.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+enum MallocSpaceType {
+  kMallocSpaceDlMalloc,
+  kMallocSpaceRosAlloc,
+};
+
+class SpaceCreateTest : public SpaceTest<CommonRuntimeTestWithParam<MallocSpaceType>> {
+ public:
+  MallocSpace* CreateSpace(const std::string& name,
+                           size_t initial_size,
+                           size_t growth_limit,
+                           size_t capacity,
+                           uint8_t* requested_begin) {
+    const MallocSpaceType type = GetParam();
+    if (type == kMallocSpaceDlMalloc) {
+      return DlMallocSpace::Create(name,
+                                   initial_size,
+                                   growth_limit,
+                                   capacity,
+                                   requested_begin,
+                                   false);
+    }
+    DCHECK_EQ(static_cast<uint32_t>(type), static_cast<uint32_t>(kMallocSpaceRosAlloc));
+    return RosAllocSpace::Create(name,
+                                 initial_size,
+                                 growth_limit,
+                                 capacity,
+                                 requested_begin,
+                                 Runtime::Current()->GetHeap()->IsLowMemoryMode(),
+                                 false);
+  }
+};
+
+TEST_P(SpaceCreateTest, InitTestBody) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  {
+    // Init < max == growth
+    std::unique_ptr<Space> space(CreateSpace("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init == max == growth
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init > max == growth
+    space.reset(CreateSpace("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Growth == init < max
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Growth < init < max
+    space.reset(CreateSpace("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Init < growth < max
+    space.reset(CreateSpace("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init < max < growth
+    space.reset(CreateSpace("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+  }
+}
+
+// TODO: This test is not very good, we should improve it.
+// The test should do more allocations before the creation of the ZygoteSpace, and then do
+// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
+// the GC works with the ZygoteSpace.
+TEST_P(SpaceCreateTest, ZygoteSpaceTestBody) {
+  size_t dummy;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+
+  // Make sure that the zygote space isn't directly at the start of the space.
+  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  heap->RemoveSpace(old_space);
+  heap->RevokeAllThreadLocalBuffers();
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space, false);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space, false);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  ptr1.Assign(Alloc(space,
+                    self,
+                    1 * MB,
+                    &ptr1_bytes_allocated,
+                    &ptr1_usable_size,
+                    &ptr1_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  ptr3.Assign(AllocWithGrowth(space,
+                              self,
+                              2 * MB,
+                              &ptr3_bytes_allocated,
+                              &ptr3_usable_size,
+                              &ptr3_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(2U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+
+  // Final clean up.
+  free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeTestBody) {
+  size_t dummy = 0;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeListTestBody) {
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the max allowed footprint.
+  mirror::Object* lots_of_objects[1024];
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = Alloc(space,
+                               self,
+                               size_of_zero_length_byte_array,
+                               &allocation_size,
+                               &usable_size,
+                               &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+
+  // Succeeds, fits by adjusting the max allowed footprint.
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    lots_of_objects[i] = AllocWithGrowth(space,
+                                         self,
+                                         1024,
+                                         &allocation_size,
+                                         &usable_size,
+                                         &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+}
+
+INSTANTIATE_TEST_CASE_P(CreateRosAllocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceRosAlloc));
+INSTANTIATE_TEST_CASE_P(CreateDlMallocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceDlMalloc));
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 4d2db11..e588eb3 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -33,12 +33,10 @@
 namespace gc {
 namespace space {
 
-class SpaceTest : public CommonRuntimeTest {
+template <class Super>
+class SpaceTest : public Super {
  public:
-  jobject byte_array_class_;
-
-  SpaceTest() : byte_array_class_(nullptr) {
-  }
+  jobject byte_array_class_ = nullptr;
 
   void AddSpace(ContinuousSpace* space, bool revoke = true) {
     Heap* heap = Runtime::Current()->GetHeap();
@@ -62,13 +60,19 @@
     return reinterpret_cast<mirror::Class*>(self->DecodeJObject(byte_array_class_));
   }
 
-  mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                        size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* Alloc(space::MallocSpace* alloc_space,
+                        Thread* self,
+                        size_t bytes,
+                        size_t* bytes_allocated,
+                        size_t* usable_size,
                         size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
-    mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size,
+    mirror::Object* obj = alloc_space->Alloc(self,
+                                             bytes,
+                                             bytes_allocated,
+                                             usable_size,
                                              bytes_tl_bulk_allocated);
     if (obj != nullptr) {
       InstallClass(obj, byte_array_class.Get(), bytes);
@@ -76,8 +80,11 @@
     return obj;
   }
 
-  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                                  size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space,
+                                  Thread* self,
+                                  size_t bytes,
+                                  size_t* bytes_allocated,
+                                  size_t* usable_size,
                                   size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
@@ -117,10 +124,6 @@
 
   typedef MallocSpace* (*CreateSpaceFn)(const std::string& name, size_t initial_size, size_t growth_limit,
                                         size_t capacity, uint8_t* requested_begin);
-  void InitTestBody(CreateSpaceFn create_space);
-  void ZygoteSpaceTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeListTestBody(CreateSpaceFn create_space);
 
   void SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
                                            int round, size_t growth_limit);
@@ -132,278 +135,11 @@
   return *seed;
 }
 
-void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
-  // This will lead to error messages in the log.
-  ScopedLogSeverity sls(LogSeverity::FATAL);
-
-  {
-    // Init < max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init == max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init > max == growth
-    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Growth == init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Growth < init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Init < growth < max
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init < max < growth
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-}
-
-// TODO: This test is not very good, we should improve it.
-// The test should do more allocations before the creation of the ZygoteSpace, and then do
-// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
-// the GC works with the ZygoteSpace.
-void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) {
-  size_t dummy;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-
-  // Make sure that the zygote space isn't directly at the start of the space.
-  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  space::Space* old_space = space;
-  heap->RemoveSpace(old_space);
-  heap->RevokeAllThreadLocalBuffers();
-  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
-                                                              heap->IsLowMemoryMode(),
-                                                              &space);
-  delete old_space;
-  // Add the zygote space.
-  AddSpace(zygote_space, false);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space, false);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                    &ptr1_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                              &ptr3_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(2U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-
-  // Final clean up.
-  free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) {
-  size_t dummy = 0;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the max allowed footprint.
-  mirror::Object* lots_of_objects[1024];
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
-    lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size,
-                               &usable_size, &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-
-  // Succeeds, fits by adjusting the max allowed footprint.
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size,
-                                         &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-}
-
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
-                                                    int round, size_t growth_limit) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space,
+                                                           intptr_t object_size,
+                                                           int round,
+                                                           size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
       ((object_size < 0 && -object_size >= static_cast<intptr_t>(growth_limit)))) {
     // No allocation can succeed
@@ -576,7 +312,9 @@
   EXPECT_LE(space->Size(), growth_limit);
 }
 
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size,
+                                                             CreateSpaceFn create_space) {
   if (object_size < SizeOfZeroLengthByteArray()) {
     // Too small for the object layout/model.
     return;
@@ -614,25 +352,8 @@
     SizeFootPrintGrowthLimitAndTrimDriver(-size, spaceFn); \
   }
 
-#define TEST_SPACE_CREATE_FN_BASE(spaceName, spaceFn) \
-  class spaceName##BaseTest : public SpaceTest { \
-  }; \
-  \
-  TEST_F(spaceName##BaseTest, Init) { \
-    InitTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, ZygoteSpace) { \
-    ZygoteSpaceTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFree) { \
-    AllocAndFreeTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFreeList) { \
-    AllocAndFreeListTestBody(spaceFn); \
-  }
-
 #define TEST_SPACE_CREATE_FN_STATIC(spaceName, spaceFn) \
-  class spaceName##StaticTest : public SpaceTest { \
+  class spaceName##StaticTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(12B, spaceName, spaceFn, 12) \
@@ -648,7 +369,7 @@
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(8MB, spaceName, spaceFn, 8 * MB)
 
 #define TEST_SPACE_CREATE_FN_RANDOM(spaceName, spaceFn) \
-  class spaceName##RandomTest : public SpaceTest { \
+  class spaceName##RandomTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimRandom(16B, spaceName, spaceFn, 16) \
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index dfc1f5f..bb35ec7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -419,18 +419,13 @@
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
         fd_(fd),
-        direct_to_ddms_(direct_to_ddms),
-        start_ns_(NanoTime()),
-        output_(nullptr),
-        current_heap_(HPROF_HEAP_DEFAULT),
-        objects_in_segment_(0),
-        next_string_id_(0x400000),
-        next_class_serial_number_(1) {
+        direct_to_ddms_(direct_to_ddms) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
   }
 
   void Dump()
-    REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
+    REQUIRES(Locks::mutator_lock_)
+    REQUIRES(!Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
     {
       MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
       if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) {
@@ -462,10 +457,11 @@
     }
 
     if (okay) {
-      uint64_t duration = NanoTime() - start_ns_;
-      LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(RoundUp(overall_size, 1024))
-          << ") in " << PrettyDuration(duration);
+      const uint64_t duration = NanoTime() - start_ns_;
+      LOG(INFO) << "hprof: heap dump completed (" << PrettySize(RoundUp(overall_size, KB))
+                << ") in " << PrettyDuration(duration)
+                << " objects " << total_objects_
+                << " objects with stack traces " << total_objects_with_stack_trace_;
     }
   }
 
@@ -855,7 +851,7 @@
     }
     CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1);
     CHECK_EQ(frames_.size(), next_frame_id);
-    VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces";
+    total_objects_with_stack_trace_ = count;
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
@@ -865,16 +861,19 @@
   int fd_;
   bool direct_to_ddms_;
 
-  uint64_t start_ns_;
+  uint64_t start_ns_ = NanoTime();
 
-  EndianOutput* output_;
+  EndianOutput* output_ = nullptr;
 
-  HprofHeapId current_heap_;  // Which heap we're currently dumping.
-  size_t objects_in_segment_;
+  HprofHeapId current_heap_ = HPROF_HEAP_DEFAULT;  // Which heap we're currently dumping.
+  size_t objects_in_segment_ = 0;
 
-  HprofStringId next_string_id_;
+  size_t total_objects_ = 0u;
+  size_t total_objects_with_stack_trace_ = 0u;
+
+  HprofStringId next_string_id_ = 0x400000;
   SafeMap<std::string, HprofStringId> strings_;
-  HprofClassSerialNumber next_class_serial_number_;
+  HprofClassSerialNumber next_class_serial_number_ = 1;
   SafeMap<mirror::Class*, HprofClassSerialNumber> classes_;
 
   std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber,
@@ -1064,6 +1063,8 @@
     return;
   }
 
+  ++total_objects_;
+
   GcRootVisitor visitor(this);
   obj->VisitReferences(visitor, VoidFunctor());
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 7d60264..c57b1bb 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -78,7 +78,7 @@
       have_field_read_listeners_(false),
       have_field_write_listeners_(false),
       have_exception_caught_listeners_(false),
-      have_backward_branch_listeners_(false),
+      have_branch_listeners_(false),
       have_invoke_virtual_or_interface_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
@@ -431,11 +431,11 @@
                            method_unwind_listeners_,
                            listener,
                            &have_method_unwind_listeners_);
-  PotentiallyAddListenerTo(kBackwardBranch,
+  PotentiallyAddListenerTo(kBranch,
                            events,
-                           backward_branch_listeners_,
+                           branch_listeners_,
                            listener,
-                           &have_backward_branch_listeners_);
+                           &have_branch_listeners_);
   PotentiallyAddListenerTo(kInvokeVirtualOrInterface,
                            events,
                            invoke_virtual_or_interface_listeners_,
@@ -508,11 +508,11 @@
                                 method_unwind_listeners_,
                                 listener,
                                 &have_method_unwind_listeners_);
-  PotentiallyRemoveListenerFrom(kBackwardBranch,
+  PotentiallyRemoveListenerFrom(kBranch,
                                 events,
-                                backward_branch_listeners_,
+                                branch_listeners_,
                                 listener,
-                                &have_backward_branch_listeners_);
+                                &have_branch_listeners_);
   PotentiallyRemoveListenerFrom(kInvokeVirtualOrInterface,
                                 events,
                                 invoke_virtual_or_interface_listeners_,
@@ -917,11 +917,13 @@
   }
 }
 
-void Instrumentation::BackwardBranchImpl(Thread* thread, ArtMethod* method,
-                                         int32_t offset) const {
-  for (InstrumentationListener* listener : backward_branch_listeners_) {
+void Instrumentation::BranchImpl(Thread* thread,
+                                 ArtMethod* method,
+                                 uint32_t dex_pc,
+                                 int32_t offset) const {
+  for (InstrumentationListener* listener : branch_listeners_) {
     if (listener != nullptr) {
-      listener->BackwardBranch(thread, method, offset);
+      listener->Branch(thread, method, dex_pc, offset);
     }
   }
 }
@@ -931,7 +933,7 @@
                                                    ArtMethod* caller,
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
-  // We can not have thread suspension since that would cause the this_object parameter to
+  // We cannot have thread suspension since that would cause the this_object parameter to
   // potentially become a dangling pointer. An alternative could be to put it in a handle instead.
   ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index b29245f..56aeefc 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -94,8 +94,11 @@
   virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
-  // Call-back for when we get a backward branch.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  // Call-back for when we execute a branch.
+  virtual void Branch(Thread* thread,
+                      ArtMethod* method,
+                      uint32_t dex_pc,
+                      int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
   // Call-back for when we get an invokevirtual or an invokeinterface.
@@ -122,7 +125,7 @@
     kFieldRead = 0x10,
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
-    kBackwardBranch = 0x80,
+    kBranch = 0x80,
     kInvokeVirtualOrInterface = 0x100,
   };
 
@@ -276,8 +279,8 @@
     return have_exception_caught_listeners_;
   }
 
-  bool HasBackwardBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return have_backward_branch_listeners_;
+  bool HasBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_branch_listeners_;
   }
 
   bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -324,11 +327,11 @@
     }
   }
 
-  // Inform listeners that a backward branch has been taken (only supported by the interpreter).
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t offset) const
+  // Inform listeners that a branch has been taken (only supported by the interpreter).
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (UNLIKELY(HasBackwardBranchListeners())) {
-      BackwardBranchImpl(thread, method, offset);
+    if (UNLIKELY(HasBranchListeners())) {
+      BranchImpl(thread, method, dex_pc, offset);
     }
   }
 
@@ -442,7 +445,7 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
+  void BranchImpl(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void InvokeVirtualOrInterfaceImpl(Thread* thread,
                                     mirror::Object* this_object,
@@ -513,8 +516,8 @@
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
-  bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  // Do we have any branch listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -537,7 +540,7 @@
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
-  std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
       GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index e4688a2..56e3bc5 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -37,7 +37,7 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_exception_caught_event(false), received_branch_event(false),
       received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
@@ -100,11 +100,12 @@
     received_exception_caught_event = true;
   }
 
-  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
-                      ArtMethod* method ATTRIBUTE_UNUSED,
-                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+  void Branch(Thread* thread ATTRIBUTE_UNUSED,
+              ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    received_backward_branch_event = true;
+    received_branch_event = true;
   }
 
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
@@ -124,7 +125,7 @@
     received_field_read_event = false;
     received_field_written_event = false;
     received_exception_caught_event = false;
-    received_backward_branch_event = false;
+    received_branch_event = false;
     received_invoke_virtual_or_interface_event = false;
   }
 
@@ -135,7 +136,7 @@
   bool received_field_read_event;
   bool received_field_written_event;
   bool received_exception_caught_event;
-  bool received_backward_branch_event;
+  bool received_branch_event;
   bool received_invoke_virtual_or_interface_event;
 
  private:
@@ -305,8 +306,8 @@
         return instr->HasFieldWriteListeners();
       case instrumentation::Instrumentation::kExceptionCaught:
         return instr->HasExceptionCaughtListeners();
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kBranch:
+        return instr->HasBranchListeners();
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
@@ -349,8 +350,8 @@
         self->ClearException();
         break;
       }
-      case instrumentation::Instrumentation::kBackwardBranch:
-        instr->BackwardBranch(self, method, dex_pc);
+      case instrumentation::Instrumentation::kBranch:
+        instr->Branch(self, method, dex_pc, -1);
         break;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
@@ -378,8 +379,8 @@
         return listener.received_field_written_event;
       case instrumentation::Instrumentation::kExceptionCaught:
         return listener.received_exception_caught_event;
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kBranch:
+        return listener.received_branch_event;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return listener.received_invoke_virtual_or_interface_event;
       default:
@@ -441,8 +442,8 @@
   TestEvent(instrumentation::Instrumentation::kExceptionCaught);
 }
 
-TEST_F(InstrumentationTest, BackwardBranchEvent) {
-  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+TEST_F(InstrumentationTest, BranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBranch);
 }
 
 TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 8f715a3..2b2176e 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -61,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
-  // can not be running while we are doing image writing. Maybe be called while while holding a
+  // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
   mirror::String* InternStrongImageString(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index e7b4731..6b5218d 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -239,7 +239,7 @@
 }
 
 #if !defined(__clang__)
-#if defined(__arm__) && !defined(ART_USE_READ_BARRIER)
+#if defined(__arm__)
 // TODO: remove when all targets implemented.
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
@@ -247,7 +247,7 @@
 #endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-#if defined(__arm__) && !defined(ART_USE_READ_BARRIER)
+#if defined(__arm__)
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
 static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 18fb0d8..ecd4de9 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -592,6 +592,10 @@
   //
   // (at this point the ArtMethod has already been replaced,
   // so we just need to fix-up the arguments)
+  //
+  // Note that FindMethodFromCode in entrypoint_utils-inl.h was also special-cased
+  // to handle the compiler optimization of replacing `this` with null without
+  // throwing NullPointerException.
   uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
   if (UNLIKELY(string_init)) {
     DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9766299..ca00621 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -63,10 +63,10 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -633,8 +633,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO) {
     int8_t offset = inst->VRegA_10t(inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -646,8 +646,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_16) {
     int16_t offset = inst->VRegA_20t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -659,8 +659,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_32) {
     int32_t offset = inst->VRegA_30t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -672,8 +672,8 @@
 
   HANDLE_INSTRUCTION_START(PACKED_SWITCH) {
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -685,8 +685,8 @@
 
   HANDLE_INSTRUCTION_START(SPARSE_SWITCH) {
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -788,8 +788,8 @@
   HANDLE_INSTRUCTION_START(IF_EQ) {
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -806,8 +806,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -824,8 +824,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -842,8 +842,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -860,8 +860,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
     shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -878,8 +878,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -895,8 +895,8 @@
   HANDLE_INSTRUCTION_START(IF_EQZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -912,8 +912,8 @@
   HANDLE_INSTRUCTION_START(IF_NEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -929,8 +929,8 @@
   HANDLE_INSTRUCTION_START(IF_LTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -946,8 +946,8 @@
   HANDLE_INSTRUCTION_START(IF_GEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -963,8 +963,8 @@
   HANDLE_INSTRUCTION_START(IF_GTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -980,8 +980,8 @@
   HANDLE_INSTRUCTION_START(IF_LEZ)  {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bab0d40..c3b75b2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -69,9 +69,9 @@
     }                                                                                           \
   } while (false)
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
@@ -565,8 +565,8 @@
       case Instruction::GOTO: {
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -575,8 +575,8 @@
       case Instruction::GOTO_16: {
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -585,8 +585,8 @@
       case Instruction::GOTO_32: {
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -595,8 +595,8 @@
       case Instruction::PACKED_SWITCH: {
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -605,8 +605,8 @@
       case Instruction::SPARSE_SWITCH: {
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -709,8 +709,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) ==
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,8 +724,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -739,8 +739,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -754,8 +754,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -769,8 +769,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -784,8 +784,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -798,8 +798,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -812,8 +812,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -826,8 +826,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -840,8 +840,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -854,8 +854,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -868,8 +868,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 4fb6df1..1bc58ac 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -276,7 +276,12 @@
    */
 #if defined(__linux__)
   hostent he;
-  char auxBuf[128];
+  // The size of the work buffer used in the gethostbyname_r call
+  // below. It used to be 128, but this was not enough on some
+  // configurations (maybe because of IPv6?), causing failures in JDWP
+  // host testing; thus it was increased to 256.
+  static constexpr size_t kAuxBufSize = 256;
+  char auxBuf[kAuxBufSize];
   int error;
   int cc = gethostbyname_r(options->host.c_str(), &he, auxBuf, sizeof(auxBuf), &pEntry, &error);
   if (cc != 0) {
@@ -298,7 +303,8 @@
 
   addr.addrInet.sin_port = htons(options->port);
 
-  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+            << ntohs(addr.addrInet.sin_port);
 
   /*
    * Create a socket.
@@ -313,13 +319,15 @@
    * Try to connect.
    */
   if (connect(clientSock, &addr.addrPlain, sizeof(addr)) != 0) {
-    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+                << ntohs(addr.addrInet.sin_port);
     close(clientSock);
     clientSock = -1;
     return false;
   }
 
-  LOG(INFO) << "Connection established to " << options->host << " (" << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
+  LOG(INFO) << "Connection established to " << options->host << " ("
+            << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
   SetAwaitingHandshake(true);
   input_count_ = 0;
 
@@ -438,7 +446,8 @@
         }
       }
       if (clientSock >= 0 && FD_ISSET(clientSock, &readfds)) {
-        readCount = read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
+        readCount =
+            read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
         if (readCount < 0) {
           /* read failed */
           if (errno != EINTR) {
@@ -479,7 +488,8 @@
     errno = 0;
     int cc = TEMP_FAILURE_RETRY(write(clientSock, input_buffer_, kMagicHandshakeLen));
     if (cc != kMagicHandshakeLen) {
-      PLOG(ERROR) << "Failed writing handshake bytes (" << cc << " of " << kMagicHandshakeLen << ")";
+      PLOG(ERROR) << "Failed writing handshake bytes ("
+                  << cc << " of " << kMagicHandshakeLen << ")";
       goto fail;
     }
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 05668a9..8f4d24f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -56,7 +56,8 @@
   os << "JIT code cache size=" << PrettySize(code_cache_->CodeCacheSize()) << "\n"
      << "JIT data cache size=" << PrettySize(code_cache_->DataCacheSize()) << "\n"
      << "JIT current capacity=" << PrettySize(code_cache_->GetCurrentCapacity()) << "\n"
-     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n";
+     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n"
+     << "JIT total number of compilations=" << code_cache_->NumberOfCompilations() << "\n";
   cumulative_timings_.Dump(os);
 }
 
@@ -127,6 +128,13 @@
     *error_msg = "JIT couldn't find jit_compile_method entry point";
     return false;
   }
+  jit_types_loaded_ = reinterpret_cast<void (*)(void*, mirror::Class**, size_t)>(
+      dlsym(jit_library_handle_, "jit_types_loaded"));
+  if (jit_types_loaded_ == nullptr) {
+    dlclose(jit_library_handle_);
+    *error_msg = "JIT couldn't find jit_types_loaded entry point";
+    return false;
+  }
   CompilerCallbacks* callbacks = nullptr;
   bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
@@ -214,5 +222,31 @@
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
 }
 
+void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->generate_debug_info_) {
+    DCHECK(jit->jit_types_loaded_ != nullptr);
+    jit->jit_types_loaded_(jit->jit_compiler_handle_, &type, 1);
+  }
+}
+
+void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
+  struct CollectClasses : public ClassVisitor {
+    bool Visit(mirror::Class* klass) override {
+      classes_.push_back(klass);
+      return true;
+    }
+    std::vector<mirror::Class*> classes_;
+  };
+
+  if (generate_debug_info_) {
+    ScopedObjectAccess so(Thread::Current());
+
+    CollectClasses visitor;
+    linker->VisitClasses(&visitor);
+    jit_types_loaded_(jit_compiler_handle_, visitor.classes_.data(), visitor.classes_.size());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 42bbbe7..429edf6 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -79,6 +79,13 @@
     DumpInfo(os);
   }
 
+  static void NewTypeLoadedIfUsingJit(mirror::Class* type)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // If debug info generation is turned on then write the type information for types already loaded
+  // into the specified class linker to the jit debug interface,
+  void DumpTypeInfoForLoadedTypes(ClassLinker* linker);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -89,6 +96,7 @@
   void* (*jit_load_)(CompilerCallbacks**, bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
+  void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
 
   // Performance monitoring.
   bool dump_info_on_shutdown_;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2d575bd..64b2c89 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -125,7 +125,8 @@
       data_end_(initial_data_capacity),
       has_done_one_collection_(false),
       last_update_time_ns_(0),
-      garbage_collect_code_(garbage_collect_code) {
+      garbage_collect_code_(garbage_collect_code),
+      number_of_compilations_(0) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
@@ -322,6 +323,7 @@
 
     __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
                             reinterpret_cast<char*>(code_ptr + code_size));
+    number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
@@ -347,6 +349,11 @@
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
+size_t JitCodeCache::NumberOfCompilations() {
+  MutexLock mu(Thread::Current(), lock_);
+  return number_of_compilations_;
+}
+
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
   return CodeCacheSizeLocked();
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index a152bcd..67fa928 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -68,6 +68,9 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t NumberOfCompilations() REQUIRES(!lock_);
+
   bool NotifyCompilationOf(ArtMethod* method, Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
@@ -261,6 +264,9 @@
   // Whether we can do garbage collection.
   const bool garbage_collect_code_;
 
+  // Number of compilations done throughout the lifetime of the JIT.
+  size_t number_of_compilations_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 4cbaf2c..6b47b67 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -165,11 +165,14 @@
   instrumentation_cache_->AddSamples(thread, method, 1);
 }
 
-void JitInstrumentationListener::BackwardBranch(Thread* thread,
-                                                ArtMethod* method,
-                                                int32_t dex_pc_offset) {
-  CHECK_LE(dex_pc_offset, 0);
-  instrumentation_cache_->AddSamples(thread, method, 1);
+void JitInstrumentationListener::Branch(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                        int32_t dex_pc_offset) {
+  if (dex_pc_offset < 0) {
+    // Increment method hotness if it is a backward branch.
+    instrumentation_cache_->AddSamples(thread, method, 1);
+  }
 }
 
 void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 15969e4..620c087 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -70,7 +70,7 @@
   void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
                   ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(Thread* thread,
@@ -84,7 +84,7 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index a132701..b4b872f 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -24,8 +24,11 @@
 
 #include "art_method-inl.h"
 #include "base/mutex.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
+#include "base/unix_file/fd_file.h"
 #include "jit/profiling_info.h"
+#include "os.h"
 #include "safe_map.h"
 
 namespace art {
@@ -37,8 +40,17 @@
     return true;
   }
 
+  ScopedFlock flock;
+  std::string error;
+  if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) {
+    LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error;
+    return false;
+  }
+
+  int fd = flock.GetFile()->Fd();
+
   ProfileCompilationInfo info;
-  if (!info.Load(filename)) {
+  if (!info.Load(fd)) {
     LOG(WARNING) << "Could not load previous profile data from file " << filename;
     return false;
   }
@@ -54,9 +66,14 @@
     }
   }
 
+  if (!flock.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear profile file: " << filename;
+    return false;
+  }
+
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  bool result = info.Save(filename);
+  bool result = info.Save(fd);
   if (result) {
     VLOG(profiler) << "Successfully saved profile info to " << filename
         << " Size: " << GetFileSizeBytes(filename);
@@ -66,64 +83,20 @@
   return result;
 }
 
-enum OpenMode {
-  READ,
-  READ_WRITE
-};
-
-static int OpenFile(const std::string& filename, OpenMode open_mode) {
-  int fd = -1;
-  switch (open_mode) {
-    case READ:
-      fd = open(filename.c_str(), O_RDONLY);
-      break;
-    case READ_WRITE:
-      // TODO(calin) allow the shared uid of the app to access the file.
-      fd = open(filename.c_str(), O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
-      break;
-  }
-
-  if (fd < 0) {
-    PLOG(WARNING) << "Failed to open profile file " << filename;
-    return -1;
-  }
-
-  // Lock the file for exclusive access but don't wait if we can't lock it.
-  int err = flock(fd, LOCK_EX | LOCK_NB);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to lock profile file " << filename;
-    return -1;
-  }
-  return fd;
-}
-
-static bool CloseDescriptorForFile(int fd, const std::string& filename) {
-  // Now unlock the file, allowing another process in.
-  int err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to unlock profile file " << filename;
-    return false;
-  }
-
-  // Done, close the file.
-  err = ::close(fd);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
-    return false;
-  }
-
-  return true;
-}
-
-static void WriteToFile(int fd, const std::ostringstream& os) {
+static bool WriteToFile(int fd, const std::ostringstream& os) {
   std::string data(os.str());
   const char *p = data.c_str();
   size_t length = data.length();
   do {
-    int n = ::write(fd, p, length);
+    int n = TEMP_FAILURE_RETRY(write(fd, p, length));
+    if (n < 0) {
+      PLOG(WARNING) << "Failed to write to descriptor: " << fd;
+      return false;
+    }
     p += n;
     length -= n;
   } while (length > 0);
+  return true;
 }
 
 static constexpr const char kFieldSeparator = ',';
@@ -137,13 +110,8 @@
  *    /system/priv-app/app/app.apk,131232145,11,23,454,54
  *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
  **/
-bool ProfileCompilationInfo::Save(const std::string& filename) {
-  int fd = OpenFile(filename, READ_WRITE);
-  if (fd == -1) {
-    return false;
-  }
-
-  // TODO(calin): Merge with a previous existing profile.
+bool ProfileCompilationInfo::Save(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
   // TODO(calin): Profile this and see how much memory it takes. If too much,
   // write to file directly.
   std::ostringstream os;
@@ -158,9 +126,7 @@
     os << kLineSeparator;
   }
 
-  WriteToFile(fd, os);
-
-  return CloseDescriptorForFile(fd, filename);
+  return WriteToFile(fd, os);
 }
 
 // TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
@@ -222,7 +188,9 @@
       LOG(WARNING) << "Cannot parse method_idx " << parts[i];
       return false;
     }
-    AddData(dex_location, checksum, method_idx);
+    if (!AddData(dex_location, checksum, method_idx)) {
+      return false;
+    }
   }
   return true;
 }
@@ -249,23 +217,18 @@
   return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
 }
 
-bool ProfileCompilationInfo::Load(const std::string& filename) {
-  int fd = OpenFile(filename, READ);
-  if (fd == -1) {
-    return false;
-  }
+bool ProfileCompilationInfo::Load(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
 
   std::string current_line;
   const int kBufferSize = 1024;
   char buffer[kBufferSize];
-  bool success = true;
 
-  while (success) {
-    int n = read(fd, buffer, kBufferSize);
+  while (true) {
+    int n = TEMP_FAILURE_RETRY(read(fd, buffer, kBufferSize));
     if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file " << filename;
-      success = false;
-      break;
+      PLOG(WARNING) << "Error when reading profile file";
+      return false;
     } else if (n == 0) {
       break;
     }
@@ -278,17 +241,13 @@
         break;
       }
       if (!ProcessLine(current_line)) {
-        success = false;
-        break;
+        return false;
       }
       // Reset the current line (we just processed it).
       current_line.clear();
     }
   }
-  if (!success) {
-    info_.clear();
-  }
-  return CloseDescriptorForFile(fd, filename) && success;
+  return true;
 }
 
 bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
@@ -369,4 +328,8 @@
   return os.str();
 }
 
+bool ProfileCompilationInfo::Equals(ProfileCompilationInfo& other) {
+  return info_.Equals(other.info_);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 26e1ac3..ffd1433 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -39,15 +39,18 @@
  */
 class ProfileCompilationInfo {
  public:
+  // Saves profile information about the given methods in the given file.
+  // Note that the saving proceeds only if the file can be locked for exclusive access.
+  // If not (the locking is not blocking), the function does not save and returns false.
   static bool SaveProfilingInfo(const std::string& filename,
                                 const std::vector<ArtMethod*>& methods);
 
-  // Loads profile information from the given file.
-  bool Load(const std::string& profile_filename);
+  // Loads profile information from the given file descriptor.
+  bool Load(uint32_t fd);
   // Loads the data from another ProfileCompilationInfo object.
   bool Load(const ProfileCompilationInfo& info);
-  // Saves the profile data to the given file.
-  bool Save(const std::string& profile_filename);
+  // Saves the profile data to the given file descriptor.
+  bool Save(uint32_t fd);
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
 
@@ -61,6 +64,9 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
+  // For testing purposes.
+  bool Equals(ProfileCompilationInfo& other);
+
  private:
   bool AddData(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
   bool ProcessLine(const std::string& line);
@@ -69,10 +75,18 @@
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
     std::set<uint16_t> method_set;
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_set == other.method_set;
+    }
   };
 
   using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
 
+  friend class ProfileCompilationInfoTest;
+  friend class CompilerDriverProfileTest;
+  friend class ProfileAssistantTest;
+
   DexFileToProfileInfoMap info_;
 };
 
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
new file mode 100644
index 0000000..482ea06
--- /dev/null
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ protected:
+  std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
+                                            const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    std::vector<ArtMethod*> methods;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      methods.push_back(&m);
+    }
+    return methods;
+  }
+
+  bool AddData(const std::string& dex_location,
+               uint32_t checksum,
+               uint16_t method_index,
+               ProfileCompilationInfo* info) {
+    return info->AddData(dex_location, checksum, method_index);
+  }
+
+  uint32_t GetFd(const ScratchFile& file) {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
+  ScratchFile profile;
+
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Save virtual methods from Main.
+  std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), main_methods));
+
+  // Check that what we saved is in the profile.
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(info1.Load(GetFd(profile)));
+  ASSERT_EQ(info1.GetNumberOfMethods(), main_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info1.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+
+  // Save virtual methods from Second.
+  std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), second_methods));
+
+  // Check that what we saved is in the profile (methods form Main and Second).
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(info2.Load(GetFd(profile)));
+  ASSERT_EQ(info2.GetNumberOfMethods(), main_methods.size() + second_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+    for (ArtMethod* m : second_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveFd) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  // Save more methods.
+  for (uint16_t i = 0; i < 100; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back everything we saved.
+  ProfileCompilationInfo loaded_info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info2.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info2.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, AddDataFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  // Trying to add info for an existing file but with a different checksum.
+  ASSERT_FALSE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  // Use the same file, change the checksum.
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+
+  ASSERT_FALSE(info1.Load(info2));
+}
+
+}  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index ec289ea..f3f5f95 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -22,16 +22,16 @@
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 500ms for now.
+// An arbitrary value to throttle save requests. Set to 2s for now.
 static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 500 * kMilisecondsToNano;
+static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 2000 * kMilisecondsToNano;
 
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 10 * 1000;  // 10 seconds
-static constexpr const uint64_t kMaxBackoffMs = 4 * 60 * 1000;  // 4 minutes
-static constexpr const uint64_t kSavePeriodMs = 4 * 1000;  // 4 seconds
+static constexpr const uint64_t kRandomDelayMaxMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kMaxBackoffMs = 5 * 60 * 1000;  // 5 minutes
+static constexpr const uint64_t kSavePeriodMs = 10 * 1000;  // 10 seconds
 static constexpr const double kBackoffCoef = 1.5;
 
 static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
@@ -86,12 +86,14 @@
 }
 
 bool ProfileSaver::ProcessProfilingInfo() {
-  VLOG(profiler) << "Initiating save profiling information to: " << output_filename_;
+  VLOG(profiler) << "Save profiling information to: " << output_filename_;
 
   uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
   if (last_update_time_ns - code_cache_last_update_time_ns_
-      > kMinimumTimeBetweenCodeCacheUpdatesNs) {
-    VLOG(profiler) << "Not enough time has passed since the last code cache update.";
+      < kMinimumTimeBetweenCodeCacheUpdatesNs) {
+    VLOG(profiler) << "Not enough time has passed since the last code cache update."
+        << "Last update: " << last_update_time_ns
+        << " Last save: " << code_cache_last_update_time_ns_;
     return false;
   }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3571edb..18c52e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -583,6 +583,10 @@
   }
 }
 
+bool MemMap::Sync() {
+  return msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index ed21365..ebd550a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -126,6 +126,8 @@
     return name_;
   }
 
+  bool Sync();
+
   bool Protect(int prot);
 
   void MadviseDontNeedAndZero();
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index edcbcf2..81c855e 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -251,6 +251,10 @@
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  // Some MIPS32 hardware (namely the Creator Ci20 development board)
+  // cannot allocate in the 2GB-4GB region.
+  TEST_DISABLED_FOR_MIPS();
+
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
@@ -271,8 +275,8 @@
         break;
       }
     }
-    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(error_msg.empty());
     ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index e89c74d..0ddd4a2 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -16,6 +16,8 @@
 
 #include "java_lang_Class.h"
 
+#include <iostream>
+
 #include "art_field-inl.h"
 #include "class_linker.h"
 #include "common_throws.h"
@@ -303,7 +305,10 @@
       // We log the error for this specific case, as the user might just swallow the exception.
       // This helps diagnose crashes when applications rely on the String#value field being
       // there.
-      LOG(ERROR) << "The String#value field is not present on Android versions >= 6.0";
+      // Also print on the error stream to test it through run-test.
+      std::string message("The String#value field is not present on Android versions >= 6.0");
+      LOG(ERROR) << message;
+      std::cerr << message << std::endl;
     }
     // We may have a pending exception if we failed to resolve.
     if (!soa.Self()->IsExceptionPending()) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 13fd6a4..989e3f9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -31,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '4', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '5', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 83e594b..82b3933 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -46,6 +46,7 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
+#include "type_lookup_table.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "vmap_table.h"
@@ -266,16 +267,15 @@
                                 i);
       return false;
     }
-
-    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
-    oat += dex_file_location_size;
-    if (UNLIKELY(oat > End())) {
+    if (UNLIKELY(static_cast<size_t>(End() - oat) < dex_file_location_size)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file "
                                     "location",
                                 GetLocation().c_str(),
                                 i);
       return false;
     }
+    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
+    oat += dex_file_location_size;
 
     std::string dex_file_location = ResolveRelativeEncodedDexLocation(
         abs_dex_location,
@@ -318,6 +318,17 @@
                                 Size());
       return false;
     }
+    if (UNLIKELY(Size() - dex_file_offset < sizeof(DexFile::Header))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u of %zu but the size of dex file header is %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                Size(),
+                                sizeof(DexFile::Header));
+      return false;
+    }
 
     const uint8_t* dex_file_pointer = Begin() + dex_file_offset;
     if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) {
@@ -339,34 +350,75 @@
       return false;
     }
     const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer);
+    if (Size() - dex_file_offset < header->file_size_) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u and size %u truncated at %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                header->file_size_,
+                                Size());
+      return false;
+    }
 
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table offset", GetLocation().c_str(), i,
+    uint32_t class_offsets_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated "
+                                    "after class offsets offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
-    uint32_t lookup_table_offset = *reinterpret_cast<const uint32_t*>(oat);
-    oat += sizeof(lookup_table_offset);
-    if (Begin() + lookup_table_offset > End()) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table", GetLocation().c_str(), i,
+    if (UNLIKELY(class_offsets_offset > Size()) ||
+        UNLIKELY((Size() - class_offsets_offset) / sizeof(uint32_t) < header->class_defs_size_)) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "class offsets, offset %u of %zu, class defs %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset,
+                                Size(),
+                                header->class_defs_size_);
+      return false;
+    }
+    if (UNLIKELY(!IsAligned<alignof(uint32_t)>(class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with unaligned "
+                                    "class offsets, offset %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset);
+      return false;
+    }
+    const uint32_t* class_offsets_pointer =
+        reinterpret_cast<const uint32_t*>(Begin() + class_offsets_offset);
+
+    uint32_t lookup_table_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &lookup_table_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after lookup table offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
     const uint8_t* lookup_table_data = lookup_table_offset != 0u
         ? Begin() + lookup_table_offset
         : nullptr;
-
-    const uint32_t* methods_offsets_pointer = reinterpret_cast<const uint32_t*>(oat);
-
-    oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_);
-    if (UNLIKELY(oat > End())) {
+    if (lookup_table_offset != 0u &&
+        (UNLIKELY(lookup_table_offset > Size()) ||
+            UNLIKELY(Size() - lookup_table_offset <
+                     TypeLookupTable::RawDataLength(header->class_defs_size_)))) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
-                                    "method offsets",
+                                    "type lookup table, offset %u of %zu, class defs %u",
                                 GetLocation().c_str(),
                                 i,
-                                dex_file_location.c_str());
+                                dex_file_location.c_str(),
+                                lookup_table_offset,
+                                Size(),
+                                header->class_defs_size_);
       return false;
     }
 
@@ -398,7 +450,7 @@
                                               dex_file_checksum,
                                               dex_file_pointer,
                                               lookup_table_data,
-                                              methods_offsets_pointer,
+                                              class_offsets_pointer,
                                               current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
@@ -627,7 +679,7 @@
 
   if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
   }
 #endif
 }
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 2b92303..341be9a 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -309,8 +309,8 @@
     const std::string option(options[i].first);
       // TODO: support -Djava.class.path
     if (option == "bootclasspath") {
-      auto boot_class_path
-          = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
+      auto boot_class_path = static_cast<std::vector<std::unique_ptr<const DexFile>>*>(
+          const_cast<void*>(options[i].second));
 
       if (runtime_options != nullptr) {
         runtime_options->Set(M::BootClassPathDexList, boot_class_path);
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index a7f7bcd..eb226d4 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -34,7 +34,7 @@
     struct stat exec_st;
     std::string exec_path = tools_dir + tool;
     if (stat(exec_path.c_str(), &exec_st) != 0) {
-      ADD_FAILURE() << "Can not find " << tool << " in " << tools_dir;
+      ADD_FAILURE() << "Cannot find " << tool << " in " << tools_dir;
     }
   }
 }
@@ -42,7 +42,7 @@
 TEST_F(PrebuiltToolsTest, CheckHostTools) {
   std::string tools_dir = GetAndroidHostToolsDir();
   if (tools_dir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory for host";
+    ADD_FAILURE() << "Cannot find Android tools directory for host";
   } else {
     CheckToolsExist(tools_dir);
   }
@@ -54,7 +54,7 @@
   for (InstructionSet isa : isas) {
     std::string tools_dir = GetAndroidTargetToolsDir(isa);
     if (tools_dir.empty()) {
-      ADD_FAILURE() << "Can not find Android tools directory for " << isa;
+      ADD_FAILURE() << "Cannot find Android tools directory for " << isa;
     } else {
       CheckToolsExist(tools_dir);
     }
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 6cea902..ca456c2 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -39,6 +39,7 @@
   kIntrinsicFloatCvt,
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
+  kIntrinsicBitCount,
   kIntrinsicNumberOfLeadingZeros,
   kIntrinsicNumberOfTrailingZeros,
   kIntrinsicRotateRight,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index c4694ee..e30c26d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1131,10 +1131,14 @@
     }
 
     std::vector<std::unique_ptr<const DexFile>> boot_class_path;
-    OpenDexFiles(dex_filenames,
-                 dex_locations,
-                 runtime_options.GetOrDefault(Opt::Image),
-                 &boot_class_path);
+    if (runtime_options.Exists(Opt::BootClassPathDexList)) {
+      boot_class_path.swap(*runtime_options.GetOrDefault(Opt::BootClassPathDexList));
+    } else {
+      OpenDexFiles(dex_filenames,
+                   dex_locations,
+                   runtime_options.GetOrDefault(Opt::Image),
+                   &boot_class_path);
+    }
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
     std::string error_msg;
     if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
@@ -1881,6 +1885,9 @@
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
                                      jit_options_->GetWarmupThreshold());
     jit_->CreateThreadPool();
+
+    // Notify native debugger about the classes already loaded before the creation of the jit.
+    jit_->DumpTypeInfoForLoadedTypes(GetClassLinker());
   } else {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
   }
diff --git a/runtime/runtime_options.cc b/runtime/runtime_options.cc
index c54461e..e75481c 100644
--- a/runtime/runtime_options.cc
+++ b/runtime/runtime_options.cc
@@ -13,8 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "runtime_options.h"
 
+#include <memory>
+
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 5624285..c5b009d 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -117,8 +117,8 @@
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
-RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
-                                          BootClassPathDexList)  // TODO: make unique_ptr
+RUNTIME_OPTIONS_KEY (std::vector<std::unique_ptr<const DexFile>>*, \
+                                          BootClassPathDexList)
 RUNTIME_OPTIONS_KEY (InstructionSet,      ImageInstructionSet,            kRuntimeISA)
 RUNTIME_OPTIONS_KEY (CompilerCallbacks*,  CompilerCallbacksPtr)  // TDOO: make unique_ptr
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index fc1a445..727ffe5 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -190,7 +190,7 @@
 class DumpCheckpoint FINAL : public Closure {
  public:
   explicit DumpCheckpoint(std::ostream* os)
-      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(GetTid())) {}
+      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(getpid())) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5815f7a..99b2296 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -815,10 +815,10 @@
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
 
-void Trace::BackwardBranch(Thread* /*thread*/, ArtMethod* method,
-                           int32_t /*dex_pc_offset*/)
+void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
+                   uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-  LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
+  LOG(ERROR) << "Unexpected branch event in tracing" << PrettyMethod(method);
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
diff --git a/runtime/trace.h b/runtime/trace.h
index 356a81f..80f1a4c 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -164,7 +164,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
index 0d40bb7..fc9faec 100644
--- a/runtime/type_lookup_table.cc
+++ b/runtime/type_lookup_table.cc
@@ -16,6 +16,7 @@
 
 #include "type_lookup_table.h"
 
+#include "base/bit_utils.h"
 #include "dex_file-inl.h"
 #include "utf-inl.h"
 #include "utils.h"
@@ -42,25 +43,39 @@
 }
 
 uint32_t TypeLookupTable::RawDataLength(const DexFile& dex_file) {
-  return RoundUpToPowerOfTwo(dex_file.NumClassDefs()) * sizeof(Entry);
+  return RawDataLength(dex_file.NumClassDefs());
 }
 
-TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file) {
+uint32_t TypeLookupTable::RawDataLength(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) * sizeof(Entry) : 0u;
+}
+
+uint32_t TypeLookupTable::CalculateMask(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) - 1u : 0u;
+}
+
+bool TypeLookupTable::SupportedSize(uint32_t num_class_defs) {
+  return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max();
+}
+
+TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file, uint8_t* storage) {
   const uint32_t num_class_defs = dex_file.NumClassDefs();
-  return (num_class_defs == 0 || num_class_defs > std::numeric_limits<uint16_t>::max())
-      ? nullptr
-      : new TypeLookupTable(dex_file);
+  return SupportedSize(num_class_defs)
+      ? new TypeLookupTable(dex_file, storage)
+      : nullptr;
 }
 
 TypeLookupTable* TypeLookupTable::Open(const uint8_t* raw_data, const DexFile& dex_file) {
   return new TypeLookupTable(raw_data, dex_file);
 }
 
-TypeLookupTable::TypeLookupTable(const DexFile& dex_file)
+TypeLookupTable::TypeLookupTable(const DexFile& dex_file, uint8_t* storage)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
-      entries_(new Entry[mask_ + 1]),
-      owns_entries_(true) {
+      mask_(CalculateMask(dex_file.NumClassDefs())),
+      entries_(storage != nullptr ? reinterpret_cast<Entry*>(storage) : new Entry[mask_ + 1]),
+      owns_entries_(storage == nullptr) {
+  static_assert(alignof(Entry) == 4u, "Expecting Entry to be 4-byte aligned.");
+  DCHECK_ALIGNED(storage, alignof(Entry));
   std::vector<uint16_t> conflict_class_defs;
   // The first stage. Put elements on their initial positions. If an initial position is already
   // occupied then delay the insertion of the element to the second stage to reduce probing
@@ -93,7 +108,7 @@
 
 TypeLookupTable::TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
+      mask_(CalculateMask(dex_file.NumClassDefs())),
       entries_(reinterpret_cast<Entry*>(const_cast<uint8_t*>(raw_data))),
       owns_entries_(false) {}
 
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 3c2295c..d74d01d 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -60,7 +60,7 @@
   }
 
   // Method creates lookup table for dex file
-  static TypeLookupTable* Create(const DexFile& dex_file);
+  static TypeLookupTable* Create(const DexFile& dex_file, uint8_t* storage = nullptr);
 
   // Method opens lookup table from binary data. Lookup table does not owns binary data.
   static TypeLookupTable* Open(const uint8_t* raw_data, const DexFile& dex_file);
@@ -76,6 +76,9 @@
   // Method returns length of binary data for the specified dex file.
   static uint32_t RawDataLength(const DexFile& dex_file);
 
+  // Method returns length of binary data for the specified number of class definitions.
+  static uint32_t RawDataLength(uint32_t num_class_defs);
+
  private:
    /**
     * To find element we need to compare strings.
@@ -109,8 +112,11 @@
     }
   };
 
+  static uint32_t CalculateMask(uint32_t num_class_defs);
+  static bool SupportedSize(uint32_t num_class_defs);
+
   // Construct from a dex file.
-  explicit TypeLookupTable(const DexFile& dex_file);
+  explicit TypeLookupTable(const DexFile& dex_file, uint8_t* storage);
 
   // Construct from a dex file with existing data.
   TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file);
diff --git a/runtime/type_lookup_table_test.cc b/runtime/type_lookup_table_test.cc
index 7f500cc..ea4d8b5 100644
--- a/runtime/type_lookup_table_test.cc
+++ b/runtime/type_lookup_table_test.cc
@@ -25,10 +25,10 @@
 
 namespace art {
 
-class TypeLookupTableTest : public CommonRuntimeTest {
- public:
-  size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
-};
+static const size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
+
+using DescriptorClassDefIdxPair = std::pair<const char*, uint32_t>;
+class TypeLookupTableTest : public CommonRuntimeTestWithParam<DescriptorClassDefIdxPair> {};
 
 TEST_F(TypeLookupTableTest, CreateLookupTable) {
   ScopedObjectAccess soa(Thread::Current());
@@ -39,48 +39,28 @@
   ASSERT_EQ(32U, table->RawDataLength());
 }
 
-TEST_F(TypeLookupTableTest, FindNonExistingClassWithoutCollisions) {
+TEST_P(TypeLookupTableTest, Find) {
   ScopedObjectAccess soa(Thread::Current());
   std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
   std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
   ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LBA;";
+  auto pair = GetParam();
+  const char* descriptor = pair.first;
   size_t hash = ComputeModifiedUtf8Hash(descriptor);
   uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(kDexNoIndex, class_def_idx);
+  ASSERT_EQ(pair.second, class_def_idx);
 }
 
-TEST_F(TypeLookupTableTest, FindNonExistingClassWithCollisions) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
-  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
-  ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LDA;";
-  size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(kDexNoIndex, class_def_idx);
-}
-
-TEST_F(TypeLookupTableTest, FindClassNoCollisions) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
-  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
-  ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LC;";
-  size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(2U, class_def_idx);
-}
-
-TEST_F(TypeLookupTableTest, FindClassWithCollisions) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
-  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
-  ASSERT_NE(nullptr, table.get());
-  const char* descriptor = "LAB;";
-  size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  uint32_t class_def_idx = table->Lookup(descriptor, hash);
-  ASSERT_EQ(1U, class_def_idx);
-}
-
+INSTANTIATE_TEST_CASE_P(FindNonExistingClassWithoutCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LAB;", 1U)));
+INSTANTIATE_TEST_CASE_P(FindNonExistingClassWithCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LDA;", kDexNoIndex)));
+INSTANTIATE_TEST_CASE_P(FindClassNoCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LC;", 2U)));
+INSTANTIATE_TEST_CASE_P(FindClassWithCollisions,
+                        TypeLookupTableTest,
+                        testing::Values(DescriptorClassDefIdxPair("LAB;", 1U)));
 }  // namespace art
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 5239e40..c67879b 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -353,7 +353,7 @@
     if (codePoint <= 0xffff) {
       if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
         // According to the Unicode standard, no character will ever
-        // be assigned to these code points, and they can not be encoded
+        // be assigned to these code points, and they cannot be encoded
         // into either utf-16 or utf-8.
         continue;
       }
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8e9f12b..07f94c0 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1115,7 +1115,7 @@
   BacktraceMap* map = existing_map;
   std::unique_ptr<BacktraceMap> tmp_map;
   if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(tid));
+    tmp_map.reset(BacktraceMap::Create(getpid()));
     map = tmp_map.get();
   }
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1c95648..7e0f337 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2735,7 +2735,8 @@
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range, is_super);
+      MethodType type = is_super ? METHOD_SUPER : METHOD_VIRTUAL;
+      ArtMethod* called_method = VerifyInvocationArgs(inst, type, is_range);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
@@ -2768,7 +2769,7 @@
     case Instruction::INVOKE_DIRECT:
     case Instruction::INVOKE_DIRECT_RANGE: {
       bool is_range = (inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range, false);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range);
       const char* return_type_descriptor;
       bool is_constructor;
       const RegType* return_type = nullptr;
@@ -2848,7 +2849,7 @@
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE: {
         bool is_range = (inst->Opcode() == Instruction::INVOKE_STATIC_RANGE);
-        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range, false);
+        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range);
         const char* descriptor;
         if (called_method == nullptr) {
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -2870,7 +2871,7 @@
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_INTERFACE_RANGE: {
       bool is_range =  (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range, false);
+      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range);
       if (abs_method != nullptr) {
         mirror::Class* called_interface = abs_method->GetDeclaringClass();
         if (!called_interface->IsInterface() && !called_interface->IsObjectClass()) {
@@ -3639,9 +3640,8 @@
   return *common_super;
 }
 
-// TODO Maybe I should just add a METHOD_SUPER to MethodType?
 ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(
-    uint32_t dex_method_idx, MethodType method_type, bool is_super) {
+    uint32_t dex_method_idx, MethodType method_type) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
@@ -3668,9 +3668,10 @@
       res_method = klass->FindDirectMethod(name, signature, pointer_size);
     } else if (method_type == METHOD_INTERFACE) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
-    } else if (is_super && klass->IsInterface()) {
+    } else if (method_type == METHOD_SUPER && klass->IsInterface()) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
     } else {
+      DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
@@ -3679,7 +3680,9 @@
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
       // a class has changed, and will be flagged as an error in later checks.
-      if (method_type == METHOD_INTERFACE || method_type == METHOD_VIRTUAL) {
+      if (method_type == METHOD_INTERFACE ||
+          method_type == METHOD_VIRTUAL ||
+          method_type == METHOD_SUPER) {
         res_method = klass->FindDirectMethod(name, signature, pointer_size);
       }
       if (res_method == nullptr) {
@@ -3742,7 +3745,7 @@
     return res_method;
   }
   // Check that invoke-virtual and invoke-super are not used on private methods of the same class.
-  if (res_method->IsPrivate() && method_type == METHOD_VIRTUAL) {
+  if (res_method->IsPrivate() && (method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-super/virtual can't be used on private method "
                                       << PrettyMethod(res_method);
     return nullptr;
@@ -3751,7 +3754,9 @@
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
-      ((method_type == METHOD_VIRTUAL || method_type == METHOD_INTERFACE) && res_method->IsDirect())
+      ((method_type == METHOD_SUPER ||
+        method_type == METHOD_VIRTUAL ||
+        method_type == METHOD_INTERFACE) && res_method->IsDirect())
       ) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
                                        " type of " << PrettyMethod(res_method);
@@ -3937,12 +3942,12 @@
 };
 
 ArtMethod* MethodVerifier::VerifyInvocationArgs(
-    const Instruction* inst, MethodType method_type, bool is_range, bool is_super) {
+    const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type, is_super);
+  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
     if (!have_pending_hard_failure_) {
@@ -3953,8 +3958,7 @@
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
   // has a vtable entry for the target method. Or the target is on a interface.
-  if (is_super) {
-    DCHECK(method_type == METHOD_VIRTUAL);
+  if (method_type == METHOD_SUPER) {
     if (res_method->GetDeclaringClass()->IsInterface()) {
       // TODO Fill in this part. Verify what we can...
       if (Runtime::Current()->IsAotCompiler()) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index ec0a8f9..a26e0fb 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -57,7 +57,8 @@
   METHOD_UNKNOWN  = 0,
   METHOD_DIRECT,      // <init>, private
   METHOD_STATIC,      // static
-  METHOD_VIRTUAL,     // virtual, super
+  METHOD_VIRTUAL,     // virtual
+  METHOD_SUPER,       // super
   METHOD_INTERFACE    // interface
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
@@ -654,7 +655,7 @@
    * the referrer can access the resolved method.
    * Does not throw exceptions.
    */
-  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type, bool is_super)
+  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -679,9 +680,7 @@
    * Returns the resolved method on success, null on failure (with *failure
    * set appropriately).
    */
-  ArtMethod* VerifyInvocationArgs(const Instruction* inst,
-                                          MethodType method_type,
-                                          bool is_range, bool is_super)
+  ArtMethod* VerifyInvocationArgs(const Instruction* inst, MethodType method_type, bool is_range)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Similar checks to the above, but on the proto. Will be used when the method cannot be
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 16cab03..0894f5d 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -667,13 +667,13 @@
     // float/long/double MERGE float/long/double_constant => float/long/double
     return SelectNonConstant(*this, incoming_type);
   } else if (IsReferenceTypes() && incoming_type.IsReferenceTypes()) {
-    if (IsZero() || incoming_type.IsZero()) {
-      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
-    } else if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
+    if (IsUninitializedTypes() || incoming_type.IsUninitializedTypes()) {
       // Something that is uninitialized hasn't had its constructor called. Unitialized types are
       // special. They may only ever be merged with themselves (must be taken care of by the
       // caller of Merge(), see the DCHECK on entry). So mark any other merge as conflicting here.
       return conflict;
+    } else if (IsZero() || incoming_type.IsZero()) {
+      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
     } else if (IsJavaLangObject() || incoming_type.IsJavaLangObject()) {
       return reg_types->JavaLangObject(false);  // Object MERGE ref => Object
     } else if (IsUnresolvedTypes() || incoming_type.IsUnresolvedTypes()) {
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 9daaf8e..d96fb42 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -133,4 +133,8 @@
   return new ZipEntry(handle_, zip_entry.release());
 }
 
+ZipArchive::~ZipArchive() {
+  CloseArchive(handle_);
+}
+
 }  // namespace art
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 717eb8c..42bf55c 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -63,9 +63,7 @@
 
   ZipEntry* Find(const char* name, std::string* error_msg) const;
 
-  ~ZipArchive() {
-    CloseArchive(handle_);
-  }
+  ~ZipArchive();
 
  private:
   explicit ZipArchive(ZipArchiveHandle handle) : handle_(handle) {}
diff --git a/test/048-reflect-v8/build b/test/048-reflect-v8/build
new file mode 100644
index 0000000..4ea1838
--- /dev/null
+++ b/test/048-reflect-v8/build
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make us exit on a failure.
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+export JACK_SERVER=false
+export JACK_REPOSITORY="${ANDROID_BUILD_TOP}/prebuilts/sdk/tools/jacks"
+# e.g. /foo/bar/jack-3.10.ALPHA.jar -> 3.10.ALPHA
+export JACK_VERSION="$(find "$JACK_REPOSITORY" -name '*ALPHA*' | sed 's/.*jack-//g' | sed 's/[.]jar//g')"
+
+./default-build "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/expected.txt b/test/048-reflect-v8/expected.txt
new file mode 100644
index 0000000..2d0b4cc
--- /dev/null
+++ b/test/048-reflect-v8/expected.txt
@@ -0,0 +1,4 @@
+Main$DefaultInterface is default = yes
+Main$RegularInterface is default = no
+Main$ImplementsWithDefault is default = yes
+Main$ImplementsWithRegular is default = no
diff --git a/test/048-reflect-v8/info.txt b/test/048-reflect-v8/info.txt
new file mode 100644
index 0000000..a336d30
--- /dev/null
+++ b/test/048-reflect-v8/info.txt
@@ -0,0 +1 @@
+Test reflection for 1.8 APIs
diff --git a/test/048-reflect-v8/run b/test/048-reflect-v8/run
new file mode 100644
index 0000000..ba3318a
--- /dev/null
+++ b/test/048-reflect-v8/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Ensure that the default methods are turned on for dalvikvm and dex2oat
+${RUN} "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/src/Main.java b/test/048-reflect-v8/src/Main.java
new file mode 100644
index 0000000..7fa2a92
--- /dev/null
+++ b/test/048-reflect-v8/src/Main.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printIsDefault(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getMethod("sayHi");
+    } catch (Throwable t) {
+      System.out.println(t);
+      return;
+    }
+
+    boolean isDefault = m.isDefault();
+    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
+  }
+
+  public static void main(String[] args) {
+    printIsDefault(DefaultInterface.class);
+    printIsDefault(RegularInterface.class);
+    printIsDefault(ImplementsWithDefault.class);
+    printIsDefault(ImplementsWithRegular.class);
+  }
+}
diff --git a/test/127-secondarydex/build b/test/127-checker-secondarydex/build
similarity index 100%
rename from test/127-secondarydex/build
rename to test/127-checker-secondarydex/build
diff --git a/test/127-secondarydex/expected.txt b/test/127-checker-secondarydex/expected.txt
similarity index 100%
rename from test/127-secondarydex/expected.txt
rename to test/127-checker-secondarydex/expected.txt
diff --git a/test/127-secondarydex/info.txt b/test/127-checker-secondarydex/info.txt
similarity index 100%
rename from test/127-secondarydex/info.txt
rename to test/127-checker-secondarydex/info.txt
diff --git a/test/127-secondarydex/run b/test/127-checker-secondarydex/run
similarity index 100%
rename from test/127-secondarydex/run
rename to test/127-checker-secondarydex/run
diff --git a/test/127-secondarydex/src/Main.java b/test/127-checker-secondarydex/src/Main.java
similarity index 100%
rename from test/127-secondarydex/src/Main.java
rename to test/127-checker-secondarydex/src/Main.java
diff --git a/test/127-secondarydex/src/Super.java b/test/127-checker-secondarydex/src/Super.java
similarity index 100%
rename from test/127-secondarydex/src/Super.java
rename to test/127-checker-secondarydex/src/Super.java
diff --git a/test/127-secondarydex/src/Test.java b/test/127-checker-secondarydex/src/Test.java
similarity index 78%
rename from test/127-secondarydex/src/Test.java
rename to test/127-checker-secondarydex/src/Test.java
index 8547e79..266ed19 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/127-checker-secondarydex/src/Test.java
@@ -23,6 +23,13 @@
         System.out.println("Test");
     }
 
+    /// CHECK-START: java.lang.Integer Test.toInteger() ssa_builder (after)
+    /// CHECK:         LoadClass needs_access_check:false klass:java.lang.Integer
+
+    public Integer toInteger() {
+        return new Integer(42);
+    }
+
     public String toString() {
         return new String("Test");
     }
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 9bfe429..77301d2 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -76,7 +76,7 @@
     }
   }
 
-  printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str());
+  printf("Cannot find %s in backtrace:\n", seq[cur_search_index].c_str());
   for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) {
     if (BacktraceMap::IsValid(it->map)) {
       printf("  %s\n", it->func_name.c_str());
@@ -112,7 +112,7 @@
 
   std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid()));
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind in process.\n");
+    printf("Cannot unwind in process.\n");
     return JNI_FALSE;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind in process.\n");
@@ -205,7 +205,7 @@
   std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD));
   bool result = true;
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind other process.\n");
+    printf("Cannot unwind other process.\n");
     result = false;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind of other process.\n");
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 0640b36..bcb697a 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -79,7 +79,7 @@
 
     private static void testUnloadClass(Constructor constructor) throws Exception {
         WeakReference<Class> klass = setUpUnloadClass(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         WeakReference<Class> klass2 = setUpUnloadClass(constructor);
         Runtime.getRuntime().gc();
@@ -91,7 +91,7 @@
     private static void testUnloadLoader(Constructor constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
-      // No strong refernces to class loader, should get unloaded.
+      // No strong references to class loader, should get unloaded.
       Runtime.getRuntime().gc();
       // If the weak reference is cleared, then it was unloaded.
       System.out.println(loader.get());
@@ -109,7 +109,7 @@
 
     private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(loader.get());
diff --git a/test/143-string-value/check b/test/143-string-value/check
index cdf7b78..92f6e90 100755
--- a/test/143-string-value/check
+++ b/test/143-string-value/check
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Strip run-specific numbers (pid and line number)
-sed -e 's/^art E[ ]\+[0-9]\+[ ]\+[0-9]\+ art\/runtime\/native\/java_lang_Class.cc:[0-9]\+\] //' "$2" > "$2.tmp"
+# Strip error log messages.
+sed -e '/^art E.*\] /d' "$2" > "$2.tmp"
 
 diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 3e6d1f4..06cfd0a 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -622,28 +622,39 @@
   static int[][] mA;
 
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: InvokeStaticOrDirect
-  /// CHECK-DAG: ArraySet
-
+  //  Array references mA[i] and ..[j] both in inner loop.
+  /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Bounds1:i\d+>>] loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG:  <<Array1>>     NullCheck [<<Field1:l\d+>>]                 loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Len1:i\d+>>  ArrayLength [<<Array1>>]                    loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Bounds1>>    BoundsCheck [<<Index1:i\d+>>,<<Len1>>]      loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Bounds2:i\d+>>] loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Array2>>     NullCheck [<<Get1>>]                        loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Len2:i\d+>>  ArrayLength [<<Array2>>]                    loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Bounds2>>    BoundsCheck [<<Index2:i\d+>>,<<Len2>>]      loop:<<InnerLoop>>
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop:B\d+>>
+  /// CHECK-DAG:  <<Field1>>     StaticFieldGet                              loop:none
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
+  //  Array reference mA[i] hoisted to same level as deopt.
+  /// CHECK-DAG:                 Deoptimize                                  loop:<<OuterLoop:B\d+>>
+  /// CHECK-DAG:                 ArrayLength                                 loop:<<OuterLoop>>
+  /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Index1:i\d+>>]  loop:<<OuterLoop>>
+  //  Array reference ..[j] still in inner loop, with a direct index.
+  /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Index2:i\d+>>]  loop:<<InnerLoop:B\d+>>
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
+  /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop>>
+  //  Synthetic phi.
+  /// CHECK-DAG:  <<Array2>>     Phi                                         loop:<<OuterLoop>>
+  /// CHECK-DAG:  <<Array1>>     StaticFieldGet                              loop:none
+  /// CHECK-EVAL: "<<InnerLoop>>" != "<<OuterLoop>>"
+  //
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
   /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-NOT: ArrayGet
-  /// CHECK-DAG: InvokeStaticOrDirect
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-
   static void dynamicBCEAndIntrinsic(int n) {
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < n; j++) {
diff --git a/test/495-checker-checkcast-tests/src/Main.java b/test/495-checker-checkcast-tests/src/Main.java
index 4b2bf09..6011c7c 100644
--- a/test/495-checker-checkcast-tests/src/Main.java
+++ b/test/495-checker-checkcast-tests/src/Main.java
@@ -113,13 +113,13 @@
   }
 
   /// CHECK-START: java.lang.String Main.knownTestWithLoadedClass() register (after)
-  /// CHECK-NOT: LoadClass
+  /// CHECK-NOT: CheckCast
   public static String knownTestWithLoadedClass() {
     return (String)$inline$getString();
   }
 
   /// CHECK-START: Itf Main.knownTestWithUnloadedClass() register (after)
-  /// CHECK: LoadClass
+  /// CHECK: CheckCast
   public static Itf knownTestWithUnloadedClass() {
     return (Itf)$inline$getString();
   }
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index f1d9a37..5561055 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -26,7 +26,7 @@
   //
 
   /// CHECK-START: int Main.linear(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -40,7 +40,7 @@
   }
 
   /// CHECK-START: int Main.linearDown(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -54,7 +54,7 @@
   }
 
   /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -69,7 +69,7 @@
   }
 
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -84,7 +84,7 @@
   }
 
   /// CHECK-START: int Main.hiddenStride(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.hiddenStride(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -101,7 +101,7 @@
   }
 
   /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -116,7 +116,7 @@
   }
 
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -134,7 +134,7 @@
   }
 
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -156,7 +156,7 @@
   }
 
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -173,7 +173,7 @@
   }
 
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -194,7 +194,7 @@
   }
 
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -208,7 +208,7 @@
   }
 
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -223,8 +223,8 @@
   }
 
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -241,7 +241,7 @@
   }
 
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -255,10 +255,12 @@
   }
 
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearByTwoSkip2(int x[]) {
     int result = 0;
@@ -270,7 +272,7 @@
   }
 
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -287,7 +289,7 @@
   }
 
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -305,10 +307,12 @@
   }
 
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
@@ -323,7 +327,7 @@
   }
 
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -341,10 +345,12 @@
   }
 
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
@@ -359,7 +365,7 @@
   }
 
   /// CHECK-START: int Main.linearForNEUp() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -374,7 +380,7 @@
   }
 
   /// CHECK-START: int Main.linearForNEDown() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -389,7 +395,7 @@
   }
 
   /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -405,7 +411,7 @@
   }
 
   /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -421,10 +427,12 @@
   }
 
   /// CHECK-START: int Main.linearShort() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.linearShort() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -437,7 +445,7 @@
   }
 
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -458,20 +466,11 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnTwoArrayLengths(int n) {
     int[] a = new int[n];
@@ -488,20 +487,11 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnOneArrayLength(int n) {
     int[] a = new int[n];
@@ -518,20 +508,11 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnParameter(int n) {
     int[] a = new int[n];
@@ -548,32 +529,13 @@
   }
 
   /// CHECK-START: void Main.linearTriangularVariations(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.linearTriangularVariations(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularVariations(int n) {
     int[] a = new int[n];
@@ -616,22 +578,11 @@
   }
 
   /// CHECK-START: void Main.bubble(int[]) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: ArraySet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.bubble(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: ArraySet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void bubble(int[] a) {
     for (int i = a.length; --i >= 0;) {
@@ -646,7 +597,7 @@
   }
 
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -664,7 +615,7 @@
   }
 
   /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -685,10 +636,10 @@
   }
 
   /// CHECK-START: int Main.periodicSequence4(int) BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -713,7 +664,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp1() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -728,7 +679,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp2() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -743,7 +694,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp3() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -758,10 +709,12 @@
   }
 
   /// CHECK-START: int Main.justOOBUp() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justOOBUp() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -774,7 +727,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown1() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -789,7 +742,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown2() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -804,7 +757,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown3() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -819,10 +772,12 @@
   }
 
   /// CHECK-START: int Main.justOOBDown() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int Main.justOOBDown() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -835,66 +790,74 @@
   }
 
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
+    // OOB!
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
     }
   }
 
   /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
+    // OOB!
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
     }
   }
 
   /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
+    // OOB!
     do {
       sResult += x[i++];
     } while (i <= x.length);
   }
 
   /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
+    // OOB!
     do {
       sResult += x[i--];
     } while (-1 <= i);
   }
 
   /// CHECK-START: int[] Main.multiply1() BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.multiply1() BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static int[] multiply1() {
     int[] a = new int[10];
@@ -912,21 +875,20 @@
   }
 
   /// CHECK-START: int[] Main.multiply2() BCE (before)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
   //
   /// CHECK-START: int[] Main.multiply2() BCE (after)
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int[] Main.multiply2() BCE (after)
+  /// CHECK-NOT: Deoptimize
   static int[] multiply2() {
     int[] a = new int[10];
     try {
       for (int i = -3; i <= 3; i++) {
         for (int j = -3; j <= 3; j++) {
           // Range [-9,9]: unsafe.
-         a[i * j] += 1;
+          a[i * j] += 1;
         }
       }
     } catch (Exception e) {
@@ -936,24 +898,19 @@
   }
 
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
-  /// CHECK: StaticFieldGet
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
-  /// CHECK: StaticFieldGet
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int linearDynamicBCE1(int[] x, int lo, int hi) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
@@ -963,24 +920,19 @@
   }
 
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
-  /// CHECK: StaticFieldGet
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
-  /// CHECK: StaticFieldGet
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: StaticFieldSet
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
@@ -990,19 +942,19 @@
   }
 
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int wrapAroundDynamicBCE(int[] x) {
     int w = 9;
     int result = 0;
@@ -1014,19 +966,19 @@
   }
 
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   private static int periodicDynamicBCE(int[] x) {
     int k = 0;
     int result = 0;
@@ -1038,20 +990,19 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
-  /// CHECK-NOT: NullCheck
-  /// CHECK-NOT: ArrayLength
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK-DAG: ArrayGet    loop:{{B\d+}}
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
   static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
     // This loop could be infinite for hi = max int. Since i is also used
     // as subscript, however, dynamic bce can proceed.
@@ -1063,16 +1014,14 @@
   }
 
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
   /// CHECK-NOT: Deoptimize
   static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
     // As above, but now the index is not used as subscript,
@@ -1085,16 +1034,14 @@
   }
 
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
   /// CHECK-NOT: Deoptimize
   static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
     int result = 0;
@@ -1107,42 +1054,21 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: NotEqual
-  /// CHECK: If
-  /// CHECK: If
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop:B\d+>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>>
   //
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
-  /// CHECK: NullCheck
-  /// CHECK: ArrayLength
-  /// CHECK: NotEqual
-  /// CHECK: If
-  /// CHECK: If
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: If
-  /// CHECK: Deoptimize
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK: BoundsCheck
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK: Exit
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: ArrayGet
+  //  Order matters:
+  /// CHECK:              Deoptimize loop:<<Loop:B\d+>>
+  //  CHECK-NOT:          Goto       loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
+  /// CHECK:              Goto       loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
   static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
     // Deliberately test array length on a before the loop so that only bounds checks
     // on constant subscripts remain, making them a viable candidate for hoisting.
@@ -1166,80 +1092,74 @@
     return result;
   }
 
-  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
-  /// CHECK: If
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
-  /// CHECK: BoundsCheck
-  /// CHECK: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  //  For brevity, just test occurrence of at least one of each in the loop:
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
   //
-  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
-  /// CHECK-DAG: If
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-NOT: BoundsCheck
-  /// CHECK-NOT: ArrayGet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: ArrayGet
-  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
-                                                  boolean[] r,
-                                                  byte[] s,
-                                                  char[] t,
-                                                  short[] u,
-                                                  int[] v,
-                                                  long[] w,
-                                                  float[] x,
-                                                  double[] y,
-                                                  Integer[] z, int lo, int hi) {
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-NOT: ArrayGet    loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck   loop:{{B\d+}}
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllPrimTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize  loop:none
+  static int dynamicBCEAndConstantIndicesAllPrimTypes(int[] q,
+                                                      boolean[] r,
+                                                      byte[] s,
+                                                      char[] t,
+                                                      short[] u,
+                                                      int[] v,
+                                                      long[] w,
+                                                      float[] x,
+                                                      double[] y, int lo, int hi) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
+      // All constant index array references can be hoisted out of the loop during BCE on q[i].
       result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
-                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+                                        (int) w[0] + (int) x[0] + (int) y[0];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: NullCheck   loop:<<Loop>>
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: ArrayGet    loop:<<Loop:B\d+>>
+  /// CHECK-DAG: ArrayGet    loop:<<Loop>>
+  /// CHECK-DAG: Deoptimize  loop:none
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndexRefType(int[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-NOT: ArrayLength loop:{{B\d+}}
+  /// CHECK-NOT: BoundsCheck loop:{{B\d+}}
+  static int dynamicBCEAndConstantIndexRefType(int[] q, Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      // Similar to above, but now implicit virtual call to intValue()
+      // prevents hoisting z[0] array get itself during BCE on q[i].
+      result += q[i] + z[0];
     }
     return result;
   }
@@ -1501,9 +1421,10 @@
     long[] x6 = { 6 };
     float[] x7 = { 7 };
     double[] x8 = { 8 };
+    expectEquals(415,
+        dynamicBCEAndConstantIndicesAllPrimTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, 0, 10));
     Integer[] x9 = { 9 };
-    expectEquals(505,
-        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
+    expectEquals(145, dynamicBCEAndConstantIndexRefType(x, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 2d0688d..549ed99 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -47,7 +47,7 @@
   /// CHECK:       <<Acc:i\d+>>         ParameterValue
   /// CHECK:       <<Left:i\d+>>        ParameterValue
   /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
   /// CHECK:                            Return [<<MulAdd>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
@@ -57,6 +57,28 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
   /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            mla w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
   public static int $opt$noinline$mulAdd(int acc, int left, int right) {
     if (doThrow) throw new Error();
     return acc + left * right;
@@ -78,7 +100,7 @@
   /// CHECK:       <<Acc:j\d+>>         ParameterValue
   /// CHECK:       <<Left:j\d+>>        ParameterValue
   /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
   /// CHECK:                            Return [<<MulSub>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
@@ -88,6 +110,28 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
   /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
 
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
+  /// CHECK:                            mls x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
   public static long $opt$noinline$mulSub(long acc, long left, long right) {
     if (doThrow) throw new Error();
     return acc - left * right;
@@ -117,7 +161,28 @@
   /// CHECK:                            Return [<<Or>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
 
   public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
     if (doThrow) throw new Error();
@@ -151,7 +216,30 @@
   /// CHECK:                            Return [<<Res>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
 
 
   public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
@@ -176,7 +264,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Acc:i\d+>>         ParameterValue
   /// CHECK:       <<Var:i\d+>>         ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
   /// CHECK:                            Return [<<MulAdd>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
@@ -186,6 +274,27 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
   /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            mla w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
   public static int $opt$noinline$mulPlusOne(int acc, int var) {
     if (doThrow) throw new Error();
     return acc * (var + 1);
@@ -207,7 +316,7 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Acc:j\d+>>         ParameterValue
   /// CHECK:       <<Var:j\d+>>         ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
   /// CHECK:                            Return [<<MulSub>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
@@ -217,11 +326,123 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
   /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
 
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
+  /// CHECK:                            mls x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
   public static long $opt$noinline$mulMinusOne(long acc, long var) {
     if (doThrow) throw new Error();
     return acc * (1 - var);
   }
 
+  /**
+   * Test basic merging of `MUL+NEG` into `MULNEG`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulNeg>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Neg
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) disassembly (after)
+  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static int $opt$noinline$mulNeg(int left, int right) {
+    if (doThrow) throw new Error();
+    return - (left * right);
+  }
+
+  /**
+   * Test basic merging of `MUL+NEG` into `MULNEG`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulNeg>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Neg
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) disassembly (after)
+  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static long $opt$noinline$mulNeg(long left, long right) {
+    if (doThrow) throw new Error();
+    return - (left * right);
+  }
 
   public static void main(String[] args) {
     assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
@@ -230,5 +451,7 @@
     assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
     assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
     assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
+    assertIntEquals(-306, $opt$noinline$mulNeg(17, 18));
+    assertLongEquals(-380, $opt$noinline$mulNeg(19, 20));
   }
 }
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 30a648d..971ad84 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -91,9 +91,7 @@
    goto :other_loop_entry
 .end method
 
-# Check that if a irreducible loop entry is dead, the loop can become
-# natural.
-# We start with:
+# Check that dce does not apply for irreducible loops.
 #
 #        entry
 #       /    \
@@ -106,18 +104,8 @@
 ## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (before)
 ## CHECK: irreducible:true
 
-# And end with:
-#
-#        entry
-#       /
-#      /
-# loop_entry
-#    /    \-
-#  exit    \-
-#           other_loop_entry
-
 ## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (after)
-## CHECK-NOT: irreducible:true
+## CHECK: irreducible:true
 .method public static dce(I)I
    .registers 3
    const/16 v0, 42
diff --git a/test/563-checker-fakestring/expected.txt b/test/563-checker-fakestring/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/563-checker-fakestring/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/563-checker-fakestring/info.txt b/test/563-checker-fakestring/info.txt
new file mode 100644
index 0000000..ef09d8c
--- /dev/null
+++ b/test/563-checker-fakestring/info.txt
@@ -0,0 +1,2 @@
+Regression test for FakeString simplification which incorrectly assumed that
+it cannot be used before a call to StringFactory.
\ No newline at end of file
diff --git a/test/563-checker-fakestring/smali/TestCase.smali b/test/563-checker-fakestring/smali/TestCase.smali
new file mode 100644
index 0000000..54312a4
--- /dev/null
+++ b/test/563-checker-fakestring/smali/TestCase.smali
@@ -0,0 +1,182 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test that all vregs holding the new-instance are updated after the
+# StringFactory call.
+
+## CHECK-START: java.lang.String TestCase.vregAliasing(byte[]) register (after)
+## CHECK-DAG:                Return [<<String:l\d+>>]
+## CHECK-DAG:     <<String>> InvokeStaticOrDirect  method_name:java.lang.String.<init>
+
+.method public static vregAliasing([B)Ljava/lang/String;
+   .registers 5
+
+   # Create new instance of String and store it to v0, v1, v2.
+   new-instance v0, Ljava/lang/String;
+   move-object v1, v0
+   move-object v2, v0
+
+   # Call String.<init> on v1.
+   const-string v3, "UTF8"
+   invoke-direct {v1, p0, v3}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+
+   # Return the object from v2.
+   return-object v2
+
+.end method
+
+# Test usage of String new-instance before it is initialized.
+
+## CHECK-START: void TestCase.compareNewInstance() register (after)
+## CHECK-DAG:     <<Null:l\d+>>   NullConstant
+## CHECK-DAG:     <<String:l\d+>> NewInstance
+## CHECK-DAG:     <<Cond:z\d+>>   NotEqual [<<String>>,<<Null>>]
+## CHECK-DAG:                     If [<<Cond>>]
+
+.method public static compareNewInstance()V
+   .registers 3
+
+   new-instance v0, Ljava/lang/String;
+   if-nez v0, :return
+
+   # Will throw NullPointerException if this branch is taken.
+   const v1, 0x0
+   const-string v2, "UTF8"
+   invoke-direct {v0, v1, v2}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-void
+
+   :return
+   return-void
+
+.end method
+
+# Test deoptimization between String's allocation and initialization. When not
+# compiling --debuggable, the NewInstance will be optimized out.
+
+## CHECK-START: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+## CHECK-START-DEBUGGABLE: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<String:l\d+>> NewInstance
+## CHECK:                         Deoptimize env:[[<<String>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+.method public static deoptimizeNewInstance([I[B)I
+   .registers 6
+
+   const v2, 0x0
+   const v1, 0x1
+
+   new-instance v0, Ljava/lang/String;
+
+   # Deoptimize here if the array is too short.
+   aget v1, p0, v1
+   add-int/2addr v2, v1
+
+   # Check that we're being executed by the interpreter.
+   invoke-static {}, LMain;->assertIsInterpreted()V
+
+   # String allocation should succeed.
+   const-string v3, "UTF8"
+   invoke-direct {v0, p1, v3}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+
+   # This ArrayGet will throw ArrayIndexOutOfBoundsException.
+   const v1, 0x4
+   aget v1, p0, v1
+   add-int/2addr v2, v1
+
+   return v2
+
+.end method
+
+# Test that a redundant NewInstance is removed if not used and not compiling
+# --debuggable.
+
+## CHECK-START: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK-NOT:     NewInstance
+## CHECK-NOT:     LoadClass
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK:         NewInstance
+
+.method public static removeNewInstance([B)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+# Test that the compiler does not assume that the first argument of String.<init>
+# is a NewInstance by inserting an irreducible loop between them (b/26676472).
+
+# We verify the type of the input instruction (Phi) in debuggable mode, because
+# it is eliminated by later stages of SsaBuilder otherwise.
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance1(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance1([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   const v1, 0x1
+   xor-int p1, p1, v1
+   :loop_entry
+   if-eqz p1, :string_init
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance2(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance2([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   if-eqz p1, :string_init
+   :loop_entry
+   const v1, 0x1
+   xor-int p1, p1, v1
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
diff --git a/test/563-checker-fakestring/src/Main.java b/test/563-checker-fakestring/src/Main.java
new file mode 100644
index 0000000..1ac8a5b
--- /dev/null
+++ b/test/563-checker-fakestring/src/Main.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static native void assertIsInterpreted();
+
+  private static void assertEqual(String expected, String actual) {
+    if (!expected.equals(actual)) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    String testString = "Hello world";
+    byte[] testData = testString.getBytes("UTF8");
+
+    {
+      Method m = c.getMethod("vregAliasing", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      c.getMethod("compareNewInstance").invoke(null, (Object[]) null);
+    }
+
+    {
+      Method m = c.getMethod("deoptimizeNewInstance", int[].class, byte[].class);
+      try {
+        m.invoke(null, new Object[] { new int[] { 1, 2, 3 }, testData });
+      } catch (InvocationTargetException ex) {
+        if (ex.getCause() instanceof ArrayIndexOutOfBoundsException) {
+          // Expected.
+        } else {
+          throw ex.getCause();
+        }
+      }
+    }
+
+    {
+      Method m = c.getMethod("removeNewInstance", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      Method m = c.getMethod("thisNotNewInstance1", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+    {
+      Method m = c.getMethod("thisNotNewInstance2", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+  }
+}
diff --git a/test/564-checker-bitcount/expected.txt b/test/564-checker-bitcount/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/564-checker-bitcount/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/564-checker-bitcount/info.txt b/test/564-checker-bitcount/info.txt
new file mode 100644
index 0000000..57db66b
--- /dev/null
+++ b/test/564-checker-bitcount/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit bit count operation.
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
new file mode 100644
index 0000000..b250145
--- /dev/null
+++ b/test/564-checker-bitcount/src/Main.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // TODO: make this work when b/26700769 is done.
+  //
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-DAG: popcnt
+  //
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-NOT: call
+  private static int bits32(int x) {
+    return Integer.bitCount(x);
+  }
+
+  // TODO: make this work when b/26700769 is done.
+  //
+  // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+  // CHECK-DAG: popcnt
+  //
+  // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+  // CHECK-NOT: call
+  private static int bits64(long x) {
+    return Long.bitCount(x);
+  }
+
+  public static void main(String args[]) {
+    expectEquals32(bits32(0x00000000), 0);
+    expectEquals32(bits32(0x00000001), 1);
+    expectEquals32(bits32(0x10000000), 1);
+    expectEquals32(bits32(0x10000001), 2);
+    expectEquals32(bits32(0x00000003), 2);
+    expectEquals32(bits32(0x70000000), 3);
+    expectEquals32(bits32(0x000F0000), 4);
+    expectEquals32(bits32(0x00001111), 4);
+    expectEquals32(bits32(0x11110000), 4);
+    expectEquals32(bits32(0x11111111), 8);
+    expectEquals32(bits32(0x12345678), 13);
+    expectEquals32(bits32(0x9ABCDEF0), 19);
+    expectEquals32(bits32(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < 32; i++) {
+      expectEquals32(bits32(1 << i), 1);
+    }
+
+    expectEquals64(bits64(0x0000000000000000L), 0);
+    expectEquals64(bits64(0x0000000000000001L), 1);
+    expectEquals64(bits64(0x1000000000000000L), 1);
+    expectEquals64(bits64(0x1000000000000001L), 2);
+    expectEquals64(bits64(0x0000000000000003L), 2);
+    expectEquals64(bits64(0x7000000000000000L), 3);
+    expectEquals64(bits64(0x000F000000000000L), 4);
+    expectEquals64(bits64(0x0000000011111111L), 8);
+    expectEquals64(bits64(0x1111111100000000L), 8);
+    expectEquals64(bits64(0x1111111111111111L), 16);
+    expectEquals64(bits64(0x123456789ABCDEF1L), 33);
+    expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < 64; i++) {
+      expectEquals64(bits64(1L << i), 1);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 27f5b5d..2e66af5 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -49,4 +49,5 @@
 b/25494456
 b/21869691
 b/26143249
+b/26579108
 Done!
diff --git a/test/800-smali/smali/b_26579108.smali b/test/800-smali/smali/b_26579108.smali
new file mode 100644
index 0000000..dde3825
--- /dev/null
+++ b/test/800-smali/smali/b_26579108.smali
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB26579108;
+.super Ljava/lang/Object;
+
+# Ensure that merging uninitialized type and null does not pass verification.
+
+.field public static field:I
+
+.method public static run()Ljava/lang/String;
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+
+    sget v1, LB26579108;->field:I
+    if-eqz v1, :cond_5
+
+    const/4 v0, 0x0
+    :cond_5
+
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    return-object v0
+  .end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index cc3b0b4..38aa58d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -143,6 +143,8 @@
                 new IncompatibleClassChangeError(), null));
         testCases.add(new TestCase("b/26143249", "B26143249", "run", null,
                 new AbstractMethodError(), null));
+        testCases.add(new TestCase("b/26579108", "B26579108", "run", null, new VerifyError(),
+                null));
     }
 
     public void runTests() {
@@ -188,8 +190,7 @@
                 if (tc.expectedException != null) {
                     errorReturn = new IllegalStateException("Expected an exception in test " +
                                                             tc.testName);
-                }
-                if (tc.expectedReturn == null && retValue != null) {
+                } else if (tc.expectedReturn == null && retValue != null) {
                     errorReturn = new IllegalStateException("Expected a null result in test " +
                                                             tc.testName);
                 } else if (tc.expectedReturn != null &&
diff --git a/test/971-iface-super-partial-compile-generated/build b/test/971-iface-super/build
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/build
rename to test/971-iface-super/build
diff --git a/test/971-iface-super-partial-compile-generated/expected.txt b/test/971-iface-super/expected.txt
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/expected.txt
rename to test/971-iface-super/expected.txt
diff --git a/test/971-iface-super-partial-compile-generated/info.txt b/test/971-iface-super/info.txt
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/info.txt
rename to test/971-iface-super/info.txt
diff --git a/test/971-iface-super-partial-compile-generated/run b/test/971-iface-super/run
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/run
rename to test/971-iface-super/run
diff --git a/test/971-iface-super-partial-compile-generated/util-src/generate_java.py b/test/971-iface-super/util-src/generate_java.py
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/util-src/generate_java.py
rename to test/971-iface-super/util-src/generate_java.py
diff --git a/test/971-iface-super-partial-compile-generated/util-src/generate_smali.py b/test/971-iface-super/util-src/generate_smali.py
similarity index 100%
rename from test/971-iface-super-partial-compile-generated/util-src/generate_smali.py
rename to test/971-iface-super/util-src/generate_smali.py
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 17023a3..36dd9f4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -70,6 +70,7 @@
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
 	  JILL_JAR=$(abspath $(JILL_JAR)) \
@@ -242,7 +243,7 @@
   968-default-partial-compile-generated \
   969-iface-super \
   970-iface-super-resolution-generated \
-  971-iface-super-partial-compile-generated
+  971-iface-super
 
 # Check if we have python3 to run our tests.
 ifeq ($(wildcard /usr/bin/python3),)
@@ -270,16 +271,6 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
-# 143-string-value tests for a LOG(E) tag, which is only supported on host.
-TEST_ART_BROKEN_TARGET_RUN_TESTS := \
-  143-string-value \
-
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TARGET_RUN_TESTS), $(ALL_ADDRESS_SIZES))
-
-TEST_ART_BROKEN_TARGET_RUN_TESTS :=
-
 # 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
   117-nopatchoat \
@@ -466,7 +457,8 @@
 
 # Known broken tests for the default compiler (Quick).
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  457-regs
+  457-regs \
+  563-checker-fakestring
 
 ifneq (,$(filter default,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -545,10 +537,13 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler.
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
+# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
+#      instruction yet (b/26601270).
 # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
 #      not yet handled in the read barrier configuration.
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
   484-checker-register-hints \
+  527-checker-array-access-split \
   537-checker-arraycopy
 
 # Tests that should fail in the read barrier configuration with JIT.
@@ -973,6 +968,7 @@
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
 	    JILL_JAR=$(abspath $(JILL_JAR)) \
diff --git a/test/127-secondarydex/src/Test.java b/test/ProfileTestMultiDex/Main.java
similarity index 66%
copy from test/127-secondarydex/src/Test.java
copy to test/ProfileTestMultiDex/Main.java
index 8547e79..41532ea 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/ProfileTestMultiDex/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,14 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
-    }
+class Main {
+  public String getA() {
+    return "A";
+  }
+  public String getB() {
+    return "B";
+  }
+  public String getC() {
+    return "C";
+  }
 }
diff --git a/test/127-secondarydex/src/Test.java b/test/ProfileTestMultiDex/Second.java
similarity index 66%
copy from test/127-secondarydex/src/Test.java
copy to test/ProfileTestMultiDex/Second.java
index 8547e79..4ac5abc 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/ProfileTestMultiDex/Second.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,14 @@
  * limitations under the License.
  */
 
-public class Test extends Super {
-    public void test(Test t) {
-        t.print();
-    }
-
-    private void print() {
-        System.out.println("Test");
-    }
-
-    public String toString() {
-        return new String("Test");
-    }
+class Second {
+  public String getX() {
+    return "X";
+  }
+  public String getY() {
+    return "Y";
+  }
+  public String getZ() {
+    return "Z";
+  }
 }
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
new file mode 100644
index 0000000..f2e3b4e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -0,0 +1,3 @@
+main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Second
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
new file mode 100644
index 0000000..44ba78e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.list
@@ -0,0 +1 @@
+Main.class
diff --git a/tools/art b/tools/art
index 304a9d0..d91b451 100644
--- a/tools/art
+++ b/tools/art
@@ -75,6 +75,7 @@
 ANDROID_ROOT=$PROG_DIR/..
 LIBDIR=$(find_libdir)
 LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBDIR
+DEBUG_OPTION=""
 
 DELETE_ANDROID_DATA=false
 # If ANDROID_DATA is the system ANDROID_DATA or is not set, use our own,
@@ -87,6 +88,7 @@
 
 if [ z"$PERF" != z ]; then
   invoke_with="perf record -o $ANDROID_DATA/perf.data -e cycles:u $invoke_with"
+  DEBUG_OPTION="-Xcompiler-option --generate-debug-info"
 fi
 
 # We use the PIC core image to work with perf.
@@ -99,7 +101,7 @@
     -XXlib:$LIBART \
     -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core-optimizing-pic.art \
-    -Xcompiler-option --generate-debug-info \
+    $DEBUG_OPTION \
     "$@"
 
 EXIT_STATUS=$?
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 2cb2c50..6ea83d2 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -27,7 +27,10 @@
   description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
   result: EXEC_FAILED,
   modes: [host],
-  names: ["libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
+  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
+          "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
+          "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
+          "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
           "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
           "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
   bug: 26507762