Merge "Refactor DexFile::DecodeDebugInfo."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index a93d8a8..43e1457 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -206,6 +206,7 @@
 ART_C_INCLUDES := \
   external/gtest/include \
   external/icu/icu4c/source/common \
+  external/lz4/lib \
   external/valgrind/include \
   external/valgrind \
   external/vixl/src \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 348eabd..f0bf499 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -92,7 +92,6 @@
 	optimizing/parallel_move_resolver.cc \
 	optimizing/pc_relative_fixups_x86.cc \
 	optimizing/prepare_for_register_allocation.cc \
-	optimizing/primitive_type_propagation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
 	optimizing/sharpening.cc \
@@ -258,9 +257,9 @@
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libart
+      LOCAL_STATIC_LIBRARIES += libart liblz4
     else
-      LOCAL_SHARED_LIBRARIES += libart
+      LOCAL_SHARED_LIBRARIES += libart liblz4
     endif
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
@@ -268,9 +267,9 @@
   else # debug
     LOCAL_MODULE := libartd-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libartd
+      LOCAL_STATIC_LIBRARIES += libartd liblz4
     else
-      LOCAL_SHARED_LIBRARIES += libartd
+      LOCAL_SHARED_LIBRARIES += libartd liblz4
     endif
   endif
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index e6cc50c..638b897 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -189,17 +189,29 @@
     }
 
     timer_.reset(new CumulativeLogger("Compilation times"));
-    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                              verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              compiler_kind_, instruction_set,
-                                              instruction_set_features_.get(),
-                                              true,
-                                              GetImageClasses(),
-                                              GetCompiledClasses(),
-                                              GetCompiledMethods(),
-                                              2, true, true, "", false, timer_.get(), -1, ""));
+    CreateCompilerDriver(compiler_kind_, instruction_set);
   }
+}
+
+void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa) {
+  compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                            verification_results_.get(),
+                                            method_inliner_map_.get(),
+                                            kind,
+                                            isa,
+                                            instruction_set_features_.get(),
+                                            true,
+                                            GetImageClasses(),
+                                            GetCompiledClasses(),
+                                            GetCompiledMethods(),
+                                            2,
+                                            true,
+                                            true,
+                                            "",
+                                            false,
+                                            timer_.get(),
+                                            -1,
+                                            ""));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 1b57b7d..b491946 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -90,6 +90,8 @@
                             const char* method_name, const char* signature)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa);
+
   void ReserveImageSpace();
 
   void UnreserveImageSpace();
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 39f8ee8..18ce563 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1430,8 +1430,4 @@
                                  method_lowering_infos_.data(), count);
 }
 
-bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
-  return cu_->compiler_driver->SkipCompilation(methodname);
-}
-
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 2da8a98..3191fe9 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -564,11 +564,6 @@
   bool SkipCompilation(std::string* skip_message);
 
   /*
-   * Should we skip the compilation of this method based on its name?
-   */
-  bool SkipCompilationByName(const std::string& methodname);
-
-  /*
    * Parse dex method and add MIR at current insert point.  Returns id (which is
    * actually the index of the method in the m_units_ array).
    */
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index eaf2408..f48947d 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -50,6 +50,23 @@
     true,   // kIntrinsicMinMaxLong
     true,   // kIntrinsicMinMaxFloat
     true,   // kIntrinsicMinMaxDouble
+    true,   // kIntrinsicCos
+    true,   // kIntrinsicSin
+    true,   // kIntrinsicAcos
+    true,   // kIntrinsicAsin
+    true,   // kIntrinsicAtan
+    true,   // kIntrinsicAtan2
+    true,   // kIntrinsicCbrt
+    true,   // kIntrinsicCosh
+    true,   // kIntrinsicExp
+    true,   // kIntrinsicExpm1
+    true,   // kIntrinsicHypot
+    true,   // kIntrinsicLog
+    true,   // kIntrinsicLog10
+    true,   // kIntrinsicNextAfter
+    true,   // kIntrinsicSinh
+    true,   // kIntrinsicTan
+    true,   // kIntrinsicTanh
     true,   // kIntrinsicSqrt
     true,   // kIntrinsicCeil
     true,   // kIntrinsicFloor
@@ -95,6 +112,23 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAsin], "Asin must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAtan], "Atan must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAtan2], "Atan2 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCbrt], "Cbrt must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCosh], "Cosh must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicExp], "Exp must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicExpm1], "Expm1 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicHypot], "Hypot must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLog], "Log must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLog10], "Log10 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNextAfter], "NextAfter must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSinh], "Sinh must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicTan], "Tan must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicTanh], "Tanh must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSqrt], "Sqrt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCeil], "Ceil must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicFloor], "Floor must be static");
@@ -196,6 +230,23 @@
     "abs",                   // kNameCacheAbs
     "max",                   // kNameCacheMax
     "min",                   // kNameCacheMin
+    "cos",                   // kNameCacheCos
+    "sin",                   // kNameCacheSin
+    "acos",                  // kNameCacheAcos
+    "asin",                  // kNameCacheAsin
+    "atan",                  // kNameCacheAtan
+    "atan2",                 // kNameCacheAtan2
+    "cbrt",                  // kNameCacheCbrt
+    "cosh",                  // kNameCacheCosh
+    "exp",                   // kNameCacheExp
+    "expm1",                 // kNameCacheExpm1
+    "hypot",                 // kNameCacheHypot
+    "log",                   // kNameCacheLog
+    "log10",                 // kNameCacheLog10
+    "nextAfter",             // kNameCacheNextAfter
+    "sinh",                  // kNameCacheSinh
+    "tan",                   // kNameCacheTan
+    "tanh",                  // kNameCacheTanh
     "sqrt",                  // kNameCacheSqrt
     "ceil",                  // kNameCacheCeil
     "floor",                 // kNameCacheFloor
@@ -425,6 +476,23 @@
     INTRINSIC(JavaLangMath,       Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
     INTRINSIC(JavaLangStrictMath, Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
 
+    INTRINSIC(JavaLangMath,       Cos, D_D, kIntrinsicCos, 0),
+    INTRINSIC(JavaLangMath,       Sin, D_D, kIntrinsicSin, 0),
+    INTRINSIC(JavaLangMath,       Acos, D_D, kIntrinsicAcos, 0),
+    INTRINSIC(JavaLangMath,       Asin, D_D, kIntrinsicAsin, 0),
+    INTRINSIC(JavaLangMath,       Atan, D_D, kIntrinsicAtan, 0),
+    INTRINSIC(JavaLangMath,       Atan2, DD_D, kIntrinsicAtan2, 0),
+    INTRINSIC(JavaLangMath,       Cbrt, D_D, kIntrinsicCbrt, 0),
+    INTRINSIC(JavaLangMath,       Cosh, D_D, kIntrinsicCosh, 0),
+    INTRINSIC(JavaLangMath,       Exp, D_D, kIntrinsicExp, 0),
+    INTRINSIC(JavaLangMath,       Expm1, D_D, kIntrinsicExpm1, 0),
+    INTRINSIC(JavaLangMath,       Hypot, DD_D, kIntrinsicHypot, 0),
+    INTRINSIC(JavaLangMath,       Log, D_D, kIntrinsicLog, 0),
+    INTRINSIC(JavaLangMath,       Log10, D_D, kIntrinsicLog10, 0),
+    INTRINSIC(JavaLangMath,       NextAfter, DD_D, kIntrinsicNextAfter, 0),
+    INTRINSIC(JavaLangMath,       Sinh, D_D, kIntrinsicSinh, 0),
+    INTRINSIC(JavaLangMath,       Tan, D_D, kIntrinsicTan, 0),
+    INTRINSIC(JavaLangMath,       Tanh, D_D, kIntrinsicTanh, 0),
     INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
     INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
 
@@ -603,6 +671,25 @@
       return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, false /* is_double */);
     case kIntrinsicMinMaxDouble:
       return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_double */);
+    case kIntrinsicCos:
+    case kIntrinsicSin:
+    case kIntrinsicAcos:
+    case kIntrinsicAsin:
+    case kIntrinsicAtan:
+    case kIntrinsicAtan2:
+    case kIntrinsicCbrt:
+    case kIntrinsicCosh:
+    case kIntrinsicExp:
+    case kIntrinsicExpm1:
+    case kIntrinsicHypot:
+    case kIntrinsicLog:
+    case kIntrinsicLog10:
+    case kIntrinsicNextAfter:
+    case kIntrinsicSinh:
+    case kIntrinsicTan:
+    case kIntrinsicTanh:
+      // Not implemented in Quick.
+      return false;
     case kIntrinsicSqrt:
       return backend->GenInlinedSqrt(info);
     case kIntrinsicCeil:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 5ce110c..ac70577 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -162,6 +162,23 @@
       kNameCacheAbs,
       kNameCacheMax,
       kNameCacheMin,
+      kNameCacheCos,
+      kNameCacheSin,
+      kNameCacheAcos,
+      kNameCacheAsin,
+      kNameCacheAtan,
+      kNameCacheAtan2,
+      kNameCacheCbrt,
+      kNameCacheCosh,
+      kNameCacheExp,
+      kNameCacheExpm1,
+      kNameCacheHypot,
+      kNameCacheLog,
+      kNameCacheLog10,
+      kNameCacheNextAfter,
+      kNameCacheSinh,
+      kNameCacheTan,
+      kNameCacheTanh,
       kNameCacheSqrt,
       kNameCacheCeil,
       kNameCacheFloor,
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 05dde9f..3260a7a 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -780,14 +780,6 @@
   PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu);
   pass_driver.Launch();
 
-  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
-  if (cu.compiler_driver->ProfilePresent()
-      && !cu.mir_graph->MethodIsLeaf()
-      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
-    cu.EndTiming();
-    return nullptr;
-  }
-
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 03bda78..2532bda 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -22,14 +22,10 @@
 
 namespace art {
 
-bool QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
-  bool result = verification_results_->ProcessVerifiedMethod(verifier);
-  if (result) {
-    MethodReference ref = verifier->GetMethodReference();
-    method_inliner_map_->GetMethodInliner(ref.dex_file)
-        ->AnalyseMethodCode(verifier);
-  }
-  return result;
+void QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
+  verification_results_->ProcessVerifiedMethod(verifier);
+  MethodReference ref = verifier->GetMethodReference();
+  method_inliner_map_->GetMethodInliner(ref.dex_file)->AnalyseMethodCode(verifier);
 }
 
 void QuickCompilerCallbacks::ClassRejected(ClassReference ref) {
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 03bf57b..4f5ea76 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -37,7 +37,7 @@
 
     ~QuickCompilerCallbacks() { }
 
-    bool MethodVerified(verifier::MethodVerifier* verifier)
+    void MethodVerified(verifier::MethodVerifier* verifier)
         SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE;
 
     void ClassRejected(ClassReference ref) OVERRIDE;
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 65b0ad6..dd24220 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -44,14 +44,14 @@
   }
 }
 
-bool VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
+void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
   DCHECK(method_verifier != nullptr);
   MethodReference ref = method_verifier->GetMethodReference();
   bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
   const VerifiedMethod* verified_method = VerifiedMethod::Create(method_verifier, compile);
   if (verified_method == nullptr) {
-    // Do not report an error to the verifier. We'll just punt this later.
-    return true;
+    // We'll punt this later.
+    return;
   }
 
   WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
@@ -69,11 +69,10 @@
     // is unsafe to replace the existing one since the JIT may be using it to generate a
     // native GC map.
     delete verified_method;
-    return true;
+    return;
   }
   verified_methods_.Put(ref, verified_method);
   DCHECK(verified_methods_.find(ref) != verified_methods_.end());
-  return true;
 }
 
 const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 9934f6b..da80bf0 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -42,7 +42,7 @@
     explicit VerificationResults(const CompilerOptions* compiler_options);
     ~VerificationResults();
 
-    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
+    void ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
         SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(!verified_methods_lock_);
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a05105b..ba8f1d0 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -80,6 +80,9 @@
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
 
+// Print additional info during profile guided compilation.
+static constexpr bool kDebugProfileGuidedCompilation = false;
+
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
 }
@@ -344,8 +347,7 @@
                                const std::string& dump_cfg_file_name, bool dump_cfg_append,
                                CumulativeLogger* timer, int swap_fd,
                                const std::string& profile_file)
-    : profile_present_(false),
-      compiler_options_(compiler_options),
+    : compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -383,12 +385,8 @@
 
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
-    profile_present_ = profile_file_.LoadFile(profile_file);
-    if (profile_present_) {
-      LOG(INFO) << "Using profile data form file " << profile_file;
-    } else {
-      LOG(INFO) << "Failed to load profile file " << profile_file;
-    }
+    profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
+    LOG(INFO) << "Using profile data from file " << profile_file;
   }
 }
 
@@ -569,7 +567,9 @@
         (verified_method->GetEncounteredVerificationFailures() &
             (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
         // Is eligable for compilation by methods-to-compile filter.
-        driver->IsMethodToCompile(method_ref);
+        driver->IsMethodToCompile(method_ref) &&
+        driver->ShouldCompileBasedOnProfile(method_ref);
+
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return null.
       compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
@@ -766,6 +766,22 @@
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
+bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
+  if (profile_compilation_info_ == nullptr) {
+    // If we miss profile information it means that we don't do a profile guided compilation.
+    // Return true, and let the other filters decide if the method should be compiled.
+    return true;
+  }
+  bool result = profile_compilation_info_->ContainsMethod(method_ref);
+
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] "
+        << (result ? "Compiled" : "Skipped") << " method:"
+        << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
+  }
+  return result;
+}
+
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
   ResolveCatchBlockExceptionsClassVisitor(
@@ -1134,15 +1150,21 @@
   // See also Compiler::ResolveDexFile
 
   bool result = false;
-  if (IsBootImage()) {
-    // We resolve all const-string strings when building for the image.
+  if (IsBootImage() || Runtime::Current()->UseJit()) {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
         soa.Self(), dex_file, false)));
-    class_linker->ResolveString(dex_file, string_idx, dex_cache);
-    result = true;
+    if (IsBootImage()) {
+      // We resolve all const-string strings when building for the image.
+      class_linker->ResolveString(dex_file, string_idx, dex_cache);
+      result = true;
+    } else {
+      // Just check whether the dex cache already has the string.
+      DCHECK(Runtime::Current()->UseJit());
+      result = (dex_cache->GetResolvedString(string_idx) != nullptr);
+    }
   }
   if (result) {
     stats_->StringInDexCache();
@@ -2037,6 +2059,7 @@
                                                 dex_cache,
                                                 class_loader,
                                                 &class_def,
+                                                Runtime::Current()->GetCompilerCallbacks(),
                                                 true /* allow soft failures */,
                                                 true /* log hard failures */,
                                                 &error_msg) ==
@@ -2273,6 +2296,16 @@
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
+  if (profile_compilation_info_ != nullptr) {
+    if (!profile_compilation_info_->Load(dex_files)) {
+      LOG(WARNING) << "Failed to load offline profile info from "
+          << profile_compilation_info_->GetFilename()
+          << ". No methods will be compiled";
+    } else if (kDebugProfileGuidedCompilation) {
+      LOG(INFO) << "[ProfileGuidedCompilation] "
+          << profile_compilation_info_->DumpInfo();
+    }
+  }
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
@@ -2510,39 +2543,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::SkipCompilation(const std::string& method_name) {
-  if (!profile_present_) {
-    return false;
-  }
-  // First find the method in the profile file.
-  ProfileFile::ProfileData data;
-  if (!profile_file_.GetProfileData(&data, method_name)) {
-    // Not in profile, no information can be determined.
-    if (kIsDebugBuild) {
-      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
-    }
-    return true;
-  }
-
-  // Methods that comprise top_k_threshold % of the total samples will be compiled.
-  // Compare against the start of the topK percentage bucket just in case the threshold
-  // falls inside a bucket.
-  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
-                 <= compiler_options_->GetTopKProfileThreshold();
-  if (kIsDebugBuild) {
-    if (compile) {
-      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-    } else {
-      VLOG(compiler) << "not compiling method " << method_name
-          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
-          << "% samples)";
-    }
-  }
-  return !compile;
-}
-
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 1347b37..f0360ce 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -31,11 +31,11 @@
 #include "compiler.h"
 #include "dex_file.h"
 #include "driver/compiled_method_storage.h"
+#include "jit/offline_profiling_info.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -147,10 +147,6 @@
     return compiler_.get();
   }
 
-  bool ProfilePresent() const {
-    return profile_present_;
-  }
-
   // Are we compiling and creating an image file?
   bool IsBootImage() const {
     return boot_image_;
@@ -161,16 +157,11 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateJniDlsymLookup() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  const std::vector<uint8_t>* CreateJniDlsymLookup() const;
+  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const;
+  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const;
+  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const;
+  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const;
 
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
@@ -445,6 +436,10 @@
   // Checks whether the provided method should be compiled, i.e., is in method_to_compile_.
   bool IsMethodToCompile(const MethodReference& method_ref) const;
 
+  // Checks whether profile guided compilation is enabled and if the method should be compiled
+  // according to the profile file.
+  bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const;
+
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       REQUIRES(!compiled_classes_lock_);
 
@@ -454,9 +449,6 @@
                                        uint16_t class_def_idx,
                                        const DexFile& dex_file) const;
 
-  // Should the compiler run on this method given profile information?
-  bool SkipCompilation(const std::string& method_name);
-
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
@@ -595,9 +587,6 @@
                       ThreadPool* thread_pool, TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  ProfileFile profile_file_;
-  bool profile_present_;
-
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -647,6 +636,9 @@
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
 
+  // Info for profile guided compilation.
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
+
   bool had_hard_verifier_failure_;
 
   size_t thread_count_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index a24c8a3..4d2d924 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -117,7 +117,7 @@
 }
 
 void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
-  ParseUintOption(option, "--inline-max-code-units=", &inline_max_code_units_, Usage);
+  ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
 }
 
 void CompilerOptions::ParseDisablePasses(const StringPiece& option,
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 01533eb..06553a6 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -436,6 +436,7 @@
       info_.StartTag(DW_TAG_compile_unit);
       info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
+      info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
       info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
       info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index cda6240..15812dc 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -43,10 +43,17 @@
     ReserveImageSpace();
     CommonCompilerTest::SetUp();
   }
+  void TestWriteRead(ImageHeader::StorageMode storage_mode);
 };
 
-TEST_F(ImageTest, WriteRead) {
-  TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING();
+void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
+  // TODO: Test does not currently work with optimizing.
+  CreateCompilerDriver(Compiler::kQuick, kRuntimeISA);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  // Enable write for dex2dex.
+  for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
+    dex_file->EnableWrite();
+  }
   // Create a generic location tmp file, to be the base of the .art and .oat temporary files.
   ScratchFile location;
   ScratchFile image_location(location, ".art");
@@ -68,17 +75,14 @@
   std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_,
                                                       requested_image_base,
                                                       /*compile_pic*/false,
-                                                      /*compile_app_image*/false));
+                                                      /*compile_app_image*/false,
+                                                      storage_mode));
   // TODO: compile_pic should be a test argument.
   {
     {
       jobject class_loader = nullptr;
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       TimingLogger timings("ImageTest::WriteRead", false, false);
       TimingLogger::ScopedTiming t("CompileAll", &timings);
-      for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
-        dex_file->EnableWrite();
-      }
       compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
@@ -113,14 +117,9 @@
       elf_writer->EndText(text);
 
       elf_writer->SetBssSize(oat_writer.GetBssSize());
-
       elf_writer->WriteDynamicSection();
-
-      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
-      elf_writer->WriteDebugInfo(method_infos);
-
-      ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
-      elf_writer->WritePatchLocations(patch_locations);
+      elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
+      elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
       success = elf_writer->End();
 
@@ -209,7 +208,13 @@
 
   gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
   ASSERT_TRUE(image_space != nullptr);
-  ASSERT_LE(image_space->Size(), image_file_size);
+  if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+    // Uncompressed, image should be smaller than file.
+    ASSERT_LE(image_space->Size(), image_file_size);
+  } else {
+    // Compressed, file should be smaller than image.
+    ASSERT_LE(image_file_size, image_space->Size());
+  }
 
   image_space->VerifyImageAllocations();
   uint8_t* image_begin = image_space->Begin();
@@ -237,6 +242,14 @@
   CHECK_EQ(0, rmdir_result);
 }
 
+TEST_F(ImageTest, WriteReadUncompressed) {
+  TestWriteRead(ImageHeader::kStorageModeUncompressed);
+}
+
+TEST_F(ImageTest, WriteReadLZ4) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4);
+}
+
 TEST_F(ImageTest, ImageHeaderIsValid) {
     uint32_t image_begin = ART_BASE_ADDRESS;
     uint32_t image_size_ = 16 * KB;
@@ -257,7 +270,9 @@
                              oat_data_end,
                              oat_file_end,
                              sizeof(void*),
-                             /*compile_pic*/false);
+                             /*compile_pic*/false,
+                             ImageHeader::kDefaultStorageMode,
+                             /*data_size*/0u);
     ASSERT_TRUE(image_header.IsValid());
 
     char* magic = const_cast<char*>(image_header.GetMagic());
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index bf1fcdd..fce08ea 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -17,6 +17,7 @@
 #include "image_writer.h"
 
 #include <sys/stat.h>
+#include <lz4.h>
 
 #include <memory>
 #include <numeric>
@@ -225,27 +226,72 @@
     return EXIT_FAILURE;
   }
 
-  // Write out the image + fields + methods.
+  std::unique_ptr<char[]> compressed_data;
+  // Image data size excludes the bitmap and the header.
   ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  const auto write_count = image_header->GetImageSize();
-  if (!image_file->WriteFully(image_->Begin(), write_count)) {
-    PLOG(ERROR) << "Failed to write image file " << image_filename;
+  const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader);
+  char* image_data = reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader);
+  size_t data_size;
+  const char* image_data_to_write;
+
+  CHECK_EQ(image_header->storage_mode_, image_storage_mode_);
+  switch (image_storage_mode_) {
+    case ImageHeader::kStorageModeLZ4: {
+      size_t compressed_max_size = LZ4_compressBound(image_data_size);
+      compressed_data.reset(new char[compressed_max_size]);
+      data_size = LZ4_compress(
+          reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader),
+          &compressed_data[0],
+          image_data_size);
+      image_data_to_write = &compressed_data[0];
+      VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size;
+      break;
+    }
+    case ImageHeader::kStorageModeUncompressed: {
+      data_size = image_data_size;
+      image_data_to_write = image_data;
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
+    }
+  }
+
+  // Write header first, as uncompressed.
+  image_header->data_size_ = data_size;
+  if (!image_file->WriteFully(image_->Begin(), sizeof(ImageHeader))) {
+    PLOG(ERROR) << "Failed to write image file header " << image_filename;
     image_file->Erase();
     return false;
   }
 
-  // Write out the image bitmap at the page aligned start of the image end.
+  // Write out the image + fields + methods.
+  const bool is_compressed = compressed_data != nullptr;
+  if (!image_file->WriteFully(image_data_to_write, data_size)) {
+    PLOG(ERROR) << "Failed to write image file data " << image_filename;
+    image_file->Erase();
+    return false;
+  }
+
+  // Write out the image bitmap at the page aligned start of the image end, also uncompressed for
+  // convenience.
   const ImageSection& bitmap_section = image_header->GetImageSection(
       ImageHeader::kSectionImageBitmap);
-  CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
+  // Align up since data size may be unaligned if the image is compressed.
+  size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize);
+  if (!is_compressed) {
+    CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset());
+  }
   if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()),
-                         bitmap_section.Size(), bitmap_section.Offset())) {
+                         bitmap_section.Size(),
+                         bitmap_position_in_file)) {
     PLOG(ERROR) << "Failed to write image file " << image_filename;
     image_file->Erase();
     return false;
   }
-
-  CHECK_EQ(bitmap_section.End(), static_cast<size_t>(image_file->GetLength()));
+  CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(),
+           static_cast<size_t>(image_file->GetLength()));
   if (image_file->FlushCloseOrErase() != 0) {
     PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
     return false;
@@ -1175,8 +1221,11 @@
     // Compiling the boot image, add null class loader.
     class_loaders_.insert(nullptr);
   }
-  if (!class_loaders_.empty()) {
-    CHECK_EQ(class_loaders_.size(), 1u) << "Should only have one real class loader in the image";
+  // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image
+  // with no classes.
+  if (class_loaders_.size() == 1u) {
+    // Only write the class table if we have exactly one class loader. There may be cases where
+    // there are multiple class loaders if a class path is passed to dex2oat.
     ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
     for (mirror::ClassLoader* loader : class_loaders_) {
       ClassTable* table = class_linker->ClassTableForClassLoader(loader);
@@ -1247,7 +1296,8 @@
   }
   CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
       "Oat file should be right after the image.";
-  // Create the header.
+  // Create the header, leave 0 for data size since we will fill this in as we are writing the
+  // image.
   new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_),
                                                           image_end,
                                                           sections,
@@ -1258,7 +1308,9 @@
                                                           PointerToLowMemUInt32(oat_data_end),
                                                           PointerToLowMemUInt32(oat_file_end),
                                                           target_ptr_size_,
-                                                          compile_pic_);
+                                                          compile_pic_,
+                                                          image_storage_mode_,
+                                                          /*data_size*/0u);
 }
 
 ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) {
@@ -1375,28 +1427,32 @@
   CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
   temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
 
-  // Write the class table(s) into the image.
-  ClassLinker* const class_linker = runtime->GetClassLinker();
-  const ImageSection& class_table_section = image_header->GetImageSection(
-      ImageHeader::kSectionClassTable);
-  uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
-  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  size_t class_table_bytes = 0;
-  for (mirror::ClassLoader* loader : class_loaders_) {
-    ClassTable* table = class_linker->ClassTableForClassLoader(loader);
-    CHECK(table != nullptr);
-    uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
-    class_table_bytes += table->WriteToMemory(memory_ptr);
-    // Fixup the pointers in the newly written class table to contain image addresses. See
-    // above comment for intern tables.
-    ClassTable temp_class_table;
-    temp_class_table.ReadFromMemory(memory_ptr);
-    // CHECK_EQ(temp_class_table.NumNonZygoteClasses(), table->NumNonZygoteClasses());
-    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
-                                                                    RootInfo(kRootUnknown));
-    temp_class_table.VisitRoots(buffered_visitor);
+  // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
+  // class loaders. Writing multiple class tables into the image is currently unsupported.
+  if (class_table_bytes_ > 0u) {
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    const ImageSection& class_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionClassTable);
+    uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    size_t class_table_bytes = 0;
+    for (mirror::ClassLoader* loader : class_loaders_) {
+      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+      CHECK(table != nullptr);
+      uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
+      class_table_bytes += table->WriteToMemory(memory_ptr);
+      // Fixup the pointers in the newly written class table to contain image addresses. See
+      // above comment for intern tables.
+      ClassTable temp_class_table;
+      temp_class_table.ReadFromMemory(memory_ptr);
+      CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+               table->NumZygoteClasses());
+      BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                      RootInfo(kRootUnknown));
+      temp_class_table.VisitRoots(buffered_visitor);
+    }
+    CHECK_EQ(class_table_bytes, class_table_bytes_);
   }
-  CHECK_EQ(class_table_bytes, class_table_bytes_);
 }
 
 void ImageWriter::CopyAndFixupObjects() {
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 386838f..8e930f0 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -30,6 +30,7 @@
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
 #include "gc/space/space.h"
+#include "image.h"
 #include "length_prefixed_array.h"
 #include "lock_word.h"
 #include "mem_map.h"
@@ -54,7 +55,8 @@
   ImageWriter(const CompilerDriver& compiler_driver,
               uintptr_t image_begin,
               bool compile_pic,
-              bool compile_app_image)
+              bool compile_app_image,
+              ImageHeader::StorageMode image_storage_mode)
       : compiler_driver_(compiler_driver),
         image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
         image_end_(0),
@@ -73,7 +75,8 @@
         image_method_array_(ImageHeader::kImageMethodsCount),
         dirty_methods_(0u),
         clean_methods_(0u),
-        class_table_bytes_(0u) {
+        class_table_bytes_(0u),
+        image_storage_mode_(image_storage_mode) {
     CHECK_NE(image_begin, 0U);
     std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
     std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
@@ -454,12 +457,17 @@
   // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
   std::unordered_map<mirror::Class*, bool> prune_class_memo_;
 
-  // Class loaders with a class table to write out. Should only be one currently.
+  // Class loaders with a class table to write out. There should only be one class loader because
+  // dex2oat loads the dex files to be compiled into a single class loader. For the boot image,
+  // null is a valid entry.
   std::unordered_set<mirror::ClassLoader*> class_loaders_;
 
   // Number of image class table bytes.
   size_t class_table_bytes_;
 
+  // Which mode the image is stored as, see image.h
+  const ImageHeader::StorageMode image_storage_mode_;
+
   friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f3bda2f..5ab55e0 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -219,7 +219,8 @@
   // calling through stub will link with &Java_MyClassNatives_bar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
@@ -233,7 +234,8 @@
   // calling through stub will link with &Java_MyClassNatives_sbar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index b8610d0..eea5204 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -157,14 +157,9 @@
     elf_writer->EndText(text);
 
     elf_writer->SetBssSize(oat_writer.GetBssSize());
-
     elf_writer->WriteDynamicSection();
-
-    ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
-    elf_writer->WriteDebugInfo(method_infos);
-
-    ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
-    elf_writer->WritePatchLocations(patch_locations);
+    elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
+    elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
     return elf_writer->End();
   }
@@ -260,7 +255,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(114 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(132 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
@@ -269,14 +264,9 @@
     std::unique_ptr<const InstructionSetFeatures> insn_features(
         InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
     ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-    std::vector<const DexFile*> dex_files;
-    uint32_t image_file_location_oat_checksum = 0;
-    uint32_t image_file_location_oat_begin = 0;
     std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
                                                             insn_features.get(),
-                                                            &dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_begin,
+                                                            0u,
                                                             nullptr));
     ASSERT_NE(oat_header.get(), nullptr);
     ASSERT_TRUE(oat_header->IsValid());
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index e8e775f..2b2f0e8 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -52,6 +52,97 @@
 
 namespace art {
 
+class OatWriter::OatClass {
+ public:
+  OatClass(size_t offset,
+           const dchecked_vector<CompiledMethod*>& compiled_methods,
+           uint32_t num_non_null_compiled_methods,
+           mirror::Class::Status status);
+  OatClass(OatClass&& src) = default;
+  size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
+  size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
+  size_t SizeOf() const;
+  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+
+  CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const {
+    return compiled_methods_[class_def_method_index];
+  }
+
+  // Offset of start of OatClass from beginning of OatHeader. It is
+  // used to validate file position when writing.
+  size_t offset_;
+
+  // CompiledMethods for each class_def_method_index, or null if no method is available.
+  dchecked_vector<CompiledMethod*> compiled_methods_;
+
+  // Offset from OatClass::offset_ to the OatMethodOffsets for the
+  // class_def_method_index. If 0, it means the corresponding
+  // CompiledMethod entry in OatClass::compiled_methods_ should be
+  // null and that the OatClass::type_ should be kOatClassBitmap.
+  dchecked_vector<uint32_t> oat_method_offsets_offsets_from_oat_class_;
+
+  // Data to write.
+
+  static_assert(mirror::Class::Status::kStatusMax < (1 << 16), "class status won't fit in 16bits");
+  int16_t status_;
+
+  static_assert(OatClassType::kOatClassMax < (1 << 16), "oat_class type won't fit in 16bits");
+  uint16_t type_;
+
+  uint32_t method_bitmap_size_;
+
+  // bit vector indexed by ClassDef method index. When
+  // OatClassType::type_ is kOatClassBitmap, a set bit indicates the
+  // method has an OatMethodOffsets in methods_offsets_, otherwise
+  // the entry was ommited to save space. If OatClassType::type_ is
+  // not is kOatClassBitmap, the bitmap will be null.
+  std::unique_ptr<BitVector> method_bitmap_;
+
+  // OatMethodOffsets and OatMethodHeaders for each CompiledMethod
+  // present in the OatClass. Note that some may be missing if
+  // OatClass::compiled_methods_ contains null values (and
+  // oat_method_offsets_offsets_from_oat_class_ should contain 0
+  // values in this case).
+  dchecked_vector<OatMethodOffsets> method_offsets_;
+  dchecked_vector<OatQuickMethodHeader> method_headers_;
+
+ private:
+  size_t GetMethodOffsetsRawSize() const {
+    return method_offsets_.size() * sizeof(method_offsets_[0]);
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(OatClass);
+};
+
+class OatWriter::OatDexFile {
+ public:
+  OatDexFile(size_t offset, const DexFile& dex_file);
+  OatDexFile(OatDexFile&& src) = default;
+
+  size_t SizeOf() const;
+  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+
+  // Offset of start of OatDexFile from beginning of OatHeader. It is
+  // used to validate file position when writing.
+  size_t offset_;
+
+  // Data to write.
+  uint32_t dex_file_location_size_;
+  const uint8_t* dex_file_location_data_;
+  uint32_t dex_file_location_checksum_;
+  uint32_t dex_file_offset_;
+  uint32_t lookup_table_offset_;
+  TypeLookupTable* lookup_table_;  // Owned by the dex file.
+  dchecked_vector<uint32_t> class_offsets_;
+
+ private:
+  size_t GetClassOffsetsRawSize() const {
+    return class_offsets_.size() * sizeof(class_offsets_[0]);
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(OatDexFile);
+};
+
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -106,14 +197,14 @@
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
-    size_oat_dex_file_methods_offsets_(0),
+    size_oat_dex_file_lookup_table_offset_(0),
+    size_oat_dex_file_class_offsets_(0),
+    size_oat_lookup_table_alignment_(0),
+    size_oat_lookup_table_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
-    size_oat_lookup_table_alignment_(0),
-    size_oat_lookup_table_offset_(0),
-    size_oat_lookup_table_(0),
     method_offset_map_() {
   CHECK(key_value_store != nullptr);
   if (compiling_boot_image) {
@@ -180,9 +271,6 @@
 }
 
 OatWriter::~OatWriter() {
-  delete oat_header_;
-  STLDeleteElements(&oat_dex_files_);
-  STLDeleteElements(&oat_classes_);
 }
 
 struct OatWriter::GcMapDataAccess {
@@ -326,6 +414,11 @@
     : DexMethodVisitor(writer, offset),
       compiled_methods_(),
       num_non_null_compiled_methods_(0u) {
+    size_t num_classes = 0u;
+    for (const OatDexFile& oat_dex_file : writer_->oat_dex_files_) {
+      num_classes += oat_dex_file.class_offsets_.size();
+    }
+    writer_->oat_classes_.reserve(num_classes);
     compiled_methods_.reserve(256u);
   }
 
@@ -364,16 +457,16 @@
       status = mirror::Class::kStatusNotReady;
     }
 
-    OatClass* oat_class = new OatClass(offset_, compiled_methods_,
-                                       num_non_null_compiled_methods_, status);
-    writer_->oat_classes_.push_back(oat_class);
-    oat_class->UpdateChecksum(writer_->oat_header_);
-    offset_ += oat_class->SizeOf();
+    writer_->oat_classes_.emplace_back(offset_,
+                                       compiled_methods_,
+                                       num_non_null_compiled_methods_,
+                                       status);
+    offset_ += writer_->oat_classes_.back().SizeOf();
     return DexMethodVisitor::EndClass();
   }
 
  private:
-  std::vector<CompiledMethod*> compiled_methods_;
+  dchecked_vector<CompiledMethod*> compiled_methods_;
   size_t num_non_null_compiled_methods_;
 };
 
@@ -396,7 +489,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {
@@ -583,7 +676,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {
@@ -600,7 +693,6 @@
           DataAccess::SetOffset(oat_class, method_offsets_index_, offset_);
           dedupe_map_.PutBefore(lb, map.data(), offset_);
           offset_ += map_size;
-          writer_->oat_header_->UpdateChecksum(&map[0], map_size);
         }
       }
       ++method_offsets_index_;
@@ -624,7 +716,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     OatMethodOffsets offsets(0u);
@@ -715,7 +807,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     // No thread suspension since dex_cache_ that may get invalidated if that occurs.
@@ -752,8 +844,7 @@
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         const OatQuickMethodHeader& method_header =
             oat_class->method_headers_[method_offsets_index_];
-        writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
-        if (!out->WriteFully(&method_header, sizeof(method_header))) {
+        if (!writer_->WriteData(out, &method_header, sizeof(method_header))) {
           ReportWriteFailure("method header", it);
           return false;
         }
@@ -790,8 +881,7 @@
           }
         }
 
-        writer_->oat_header_->UpdateChecksum(quick_code.data(), code_size);
-        if (!out->WriteFully(quick_code.data(), code_size)) {
+        if (!writer_->WriteData(out, quick_code.data(), code_size)) {
           ReportWriteFailure("method code", it);
           return false;
         }
@@ -945,7 +1035,7 @@
   }
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {  // ie. not an abstract method
@@ -963,7 +1053,7 @@
           << map_size << " " << map_offset << " " << offset_ << " "
           << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " for " << DataAccess::Name();
       if (map_size != 0u && map_offset == offset_) {
-        if (UNLIKELY(!out->WriteFully(&map[0], map_size))) {
+        if (UNLIKELY(!writer_->WriteData(out, map.data(), map_size))) {
           ReportWriteFailure(it);
           return false;
         }
@@ -1028,12 +1118,12 @@
 }
 
 size_t OatWriter::InitOatHeader() {
-  oat_header_ = OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                  compiler_driver_->GetInstructionSetFeatures(),
-                                  dex_files_,
-                                  image_file_location_oat_checksum_,
-                                  image_file_location_oat_begin_,
-                                  key_value_store_);
+  oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
+                                      compiler_driver_->GetInstructionSetFeatures(),
+                                      dchecked_integral_cast<uint32_t>(dex_files_->size()),
+                                      key_value_store_));
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
 
   return oat_header_->GetHeaderSize();
 }
@@ -1043,9 +1133,8 @@
   for (size_t i = 0; i != dex_files_->size(); ++i) {
     const DexFile* dex_file = (*dex_files_)[i];
     CHECK(dex_file != nullptr);
-    OatDexFile* oat_dex_file = new OatDexFile(offset, *dex_file);
-    oat_dex_files_.push_back(oat_dex_file);
-    offset += oat_dex_file->SizeOf();
+    oat_dex_files_.emplace_back(offset, *dex_file);
+    offset += oat_dex_files_.back().SizeOf();
   }
   return offset;
 }
@@ -1059,12 +1148,12 @@
     size_dex_file_alignment_ += offset - original_offset;
 
     // set offset in OatDexFile to DexFile
-    oat_dex_files_[i]->dex_file_offset_ = offset;
+    oat_dex_files_[i].dex_file_offset_ = offset;
 
     const DexFile* dex_file = (*dex_files_)[i];
 
     // Initialize type lookup table
-    oat_dex_files_[i]->lookup_table_ = dex_file->GetTypeLookupTable();
+    oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
 
     offset += dex_file->GetHeader().file_size_;
   }
@@ -1072,14 +1161,14 @@
 }
 
 size_t OatWriter::InitLookupTables(size_t offset) {
-  for (OatDexFile* oat_dex_file : oat_dex_files_) {
-    if (oat_dex_file->lookup_table_ != nullptr) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.lookup_table_ != nullptr) {
       uint32_t aligned_offset = RoundUp(offset, 4);
-      oat_dex_file->lookup_table_offset_ = aligned_offset;
+      oat_dex_file.lookup_table_offset_ = aligned_offset;
       size_oat_lookup_table_alignment_ += aligned_offset - offset;
-      offset = aligned_offset + oat_dex_file->lookup_table_->RawDataLength();
+      offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
     } else {
-      oat_dex_file->lookup_table_offset_ = 0;
+      oat_dex_file.lookup_table_offset_ = 0;
     }
   }
   return offset;
@@ -1094,13 +1183,12 @@
 
   // Update oat_dex_files_.
   auto oat_class_it = oat_classes_.begin();
-  for (OatDexFile* oat_dex_file : oat_dex_files_) {
-    for (uint32_t& method_offset : oat_dex_file->methods_offsets_) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    for (uint32_t& class_offset : oat_dex_file.class_offsets_) {
       DCHECK(oat_class_it != oat_classes_.end());
-      method_offset = (*oat_class_it)->offset_;
+      class_offset = oat_class_it->offset_;
       ++oat_class_it;
     }
-    oat_dex_file->UpdateChecksum(oat_header_);
   }
   CHECK(oat_class_it == oat_classes_.end());
 
@@ -1184,17 +1272,14 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
-  if (raw_file_offset == (off_t) -1) {
-    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+  if (!GetOatDataOffset(out)) {
     return false;
   }
-  const size_t file_offset = static_cast<size_t>(raw_file_offset);
-  oat_data_offset_ = file_offset;
+  const size_t file_offset = oat_data_offset_;
 
   // Reserve space for header. It will be written last - after updating the checksum.
   size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == (off_t) -1) {
+  if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
     return false;
   }
@@ -1207,7 +1292,7 @@
   }
 
   off_t tables_end_offset = out->Seek(0, kSeekCurrent);
-  if (tables_end_offset == (off_t) -1) {
+  if (tables_end_offset == static_cast<off_t>(-1)) {
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
@@ -1252,7 +1337,7 @@
   }
 
   const off_t oat_end_file_offset = out->Seek(0, kSeekCurrent);
-  if (oat_end_file_offset == (off_t) -1) {
+  if (oat_end_file_offset == static_cast<off_t>(-1)) {
     LOG(ERROR) << "Failed to get oat end file offset in " << out->GetLocation();
     return false;
   }
@@ -1288,14 +1373,14 @@
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
-    DO_STAT(size_oat_dex_file_methods_offsets_);
+    DO_STAT(size_oat_dex_file_lookup_table_offset_);
+    DO_STAT(size_oat_dex_file_class_offsets_);
+    DO_STAT(size_oat_lookup_table_alignment_);
+    DO_STAT(size_oat_lookup_table_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
     DO_STAT(size_oat_class_method_offsets_);
-    DO_STAT(size_oat_lookup_table_alignment_);
-    DO_STAT(size_oat_lookup_table_offset_);
-    DO_STAT(size_oat_lookup_table_);
     #undef DO_STAT
 
     VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)"; \
@@ -1306,17 +1391,20 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
+  // Finalize the header checksum.
+  oat_header_->UpdateChecksumWithHeaderData();
+
   // Write the header now that the checksum is final.
-  if (out->Seek(file_offset, kSeekSet) == (off_t) -1) {
+  if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
-  if (!out->WriteFully(oat_header_, header_size)) {
+  if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == (off_t) -1) {
+  if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
     return false;
   }
@@ -1327,13 +1415,13 @@
 
 bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
   for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i]->Write(this, out, file_offset)) {
+    if (!oat_dex_files_[i].Write(this, out, file_offset)) {
       PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
       return false;
     }
   }
   for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i]->dex_file_offset_;
+    uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
     off_t actual_offset = out->Seek(expected_offset, kSeekSet);
     if (static_cast<uint32_t>(actual_offset) != expected_offset) {
       const DexFile* dex_file = (*dex_files_)[i];
@@ -1353,7 +1441,7 @@
     return false;
   }
   for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i]->Write(this, out, file_offset)) {
+    if (!oat_classes_[i].Write(this, out, file_offset)) {
       PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
       return false;
     }
@@ -1363,8 +1451,8 @@
 
 bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
   for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
-    const uint32_t lookup_table_offset = oat_dex_files_[i]->lookup_table_offset_;
-    const TypeLookupTable* table = oat_dex_files_[i]->lookup_table_;
+    const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
+    const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
     DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
     if (lookup_table_offset == 0) {
       continue;
@@ -1378,7 +1466,7 @@
       return false;
     }
     if (table != nullptr) {
-      if (!out->WriteFully(table->RawData(), table->RawDataLength())) {
+      if (!WriteData(out, table->RawData(), table->RawDataLength())) {
         const DexFile* dex_file = (*dex_files_)[i];
         PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
                     << " to " << out->GetLocation();
@@ -1427,7 +1515,7 @@
         uint32_t alignment_padding = aligned_offset - relative_offset; \
         out->Seek(alignment_padding, kSeekCurrent); \
         size_trampoline_alignment_ += alignment_padding; \
-        if (!out->WriteFully(&(*field)[0], field->size())) { \
+        if (!WriteData(out, field->data(), field->size())) { \
           PLOG(ERROR) << "Failed to write " # field " to " << out->GetLocation(); \
           return false; \
         } \
@@ -1469,6 +1557,17 @@
   return relative_offset;
 }
 
+bool OatWriter::GetOatDataOffset(OutputStream* out) {
+  // Get the elf file offset of the oat file.
+  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
+  if (raw_file_offset == static_cast<off_t>(-1)) {
+    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+    return false;
+  }
+  oat_data_offset_ = static_cast<size_t>(raw_file_offset);
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1481,6 +1580,11 @@
   return true;
 }
 
+bool OatWriter::WriteData(OutputStream* out, const void* data, size_t size) {
+  oat_header_->UpdateChecksum(data, size);
+  return out->WriteFully(data, size);
+}
+
 std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) {
   auto it = map.find(ref);
   if (it == map.end()) {
@@ -1498,7 +1602,7 @@
   dex_file_location_checksum_ = dex_file.GetLocationChecksum();
   dex_file_offset_ = 0;
   lookup_table_offset_ = 0;
-  methods_offsets_.resize(dex_file.NumClassDefs());
+  class_offsets_.resize(dex_file.NumClassDefs());
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1507,63 +1611,50 @@
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
           + sizeof(lookup_table_offset_)
-          + (sizeof(methods_offsets_[0]) * methods_offsets_.size());
-}
-
-void OatWriter::OatDexFile::UpdateChecksum(OatHeader* oat_header) const {
-  oat_header->UpdateChecksum(&dex_file_location_size_, sizeof(dex_file_location_size_));
-  oat_header->UpdateChecksum(dex_file_location_data_, dex_file_location_size_);
-  oat_header->UpdateChecksum(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_));
-  oat_header->UpdateChecksum(&dex_file_offset_, sizeof(dex_file_offset_));
-  oat_header->UpdateChecksum(&lookup_table_offset_, sizeof(lookup_table_offset_));
-  if (lookup_table_ != nullptr) {
-    oat_header->UpdateChecksum(lookup_table_->RawData(), lookup_table_->RawDataLength());
-  }
-  oat_header->UpdateChecksum(&methods_offsets_[0],
-                            sizeof(methods_offsets_[0]) * methods_offsets_.size());
+          + (sizeof(class_offsets_[0]) * class_offsets_.size());
 }
 
 bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
                                   OutputStream* out,
                                   const size_t file_offset) const {
   DCHECK_OFFSET_();
-  if (!out->WriteFully(&dex_file_location_size_, sizeof(dex_file_location_size_))) {
+  if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
-  if (!out->WriteFully(dex_file_location_data_, dex_file_location_size_)) {
+  if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
-  if (!out->WriteFully(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_))) {
+  if (!oat_writer->WriteData(out,
+                             &dex_file_location_checksum_,
+                             sizeof(dex_file_location_checksum_))) {
     PLOG(ERROR) << "Failed to write dex file location checksum to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
-  if (!out->WriteFully(&dex_file_offset_, sizeof(dex_file_offset_))) {
+  if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
-  if (!out->WriteFully(&lookup_table_offset_, sizeof(lookup_table_offset_))) {
+  if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_lookup_table_offset_ += sizeof(lookup_table_offset_);
-  if (!out->WriteFully(&methods_offsets_[0],
-                      sizeof(methods_offsets_[0]) * methods_offsets_.size())) {
+  oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+  if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_methods_offsets_ +=
-      sizeof(methods_offsets_[0]) * methods_offsets_.size();
+  oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
 OatWriter::OatClass::OatClass(size_t offset,
-                              const std::vector<CompiledMethod*>& compiled_methods,
+                              const dchecked_vector<CompiledMethod*>& compiled_methods,
                               uint32_t num_non_null_compiled_methods,
                               mirror::Class::Status status)
     : compiled_methods_(compiled_methods) {
@@ -1593,7 +1684,7 @@
 
   uint32_t oat_method_offsets_offset_from_oat_class = sizeof(type_) + sizeof(status_);
   if (type_ == kOatClassSomeCompiled) {
-    method_bitmap_ = new BitVector(num_methods, false, Allocator::GetMallocAllocator());
+    method_bitmap_.reset(new BitVector(num_methods, false, Allocator::GetMallocAllocator()));
     method_bitmap_size_ = method_bitmap_->GetSizeOf();
     oat_method_offsets_offset_from_oat_class += sizeof(method_bitmap_size_);
     oat_method_offsets_offset_from_oat_class += method_bitmap_size_;
@@ -1616,10 +1707,6 @@
   }
 }
 
-OatWriter::OatClass::~OatClass() {
-  delete method_bitmap_;
-}
-
 size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatHeader(
     size_t class_def_method_index_) const {
   uint32_t method_offset = GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_);
@@ -1642,51 +1729,42 @@
           + (sizeof(method_offsets_[0]) * method_offsets_.size());
 }
 
-void OatWriter::OatClass::UpdateChecksum(OatHeader* oat_header) const {
-  oat_header->UpdateChecksum(&status_, sizeof(status_));
-  oat_header->UpdateChecksum(&type_, sizeof(type_));
-  if (method_bitmap_size_ != 0) {
-    CHECK_EQ(kOatClassSomeCompiled, type_);
-    oat_header->UpdateChecksum(&method_bitmap_size_, sizeof(method_bitmap_size_));
-    oat_header->UpdateChecksum(method_bitmap_->GetRawStorage(), method_bitmap_size_);
-  }
-  oat_header->UpdateChecksum(&method_offsets_[0],
-                             sizeof(method_offsets_[0]) * method_offsets_.size());
-}
-
 bool OatWriter::OatClass::Write(OatWriter* oat_writer,
                                 OutputStream* out,
                                 const size_t file_offset) const {
   DCHECK_OFFSET_();
-  if (!out->WriteFully(&status_, sizeof(status_))) {
+  if (!oat_writer->WriteData(out, &status_, sizeof(status_))) {
     PLOG(ERROR) << "Failed to write class status to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_status_ += sizeof(status_);
-  if (!out->WriteFully(&type_, sizeof(type_))) {
+
+  if (!oat_writer->WriteData(out, &type_, sizeof(type_))) {
     PLOG(ERROR) << "Failed to write oat class type to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_type_ += sizeof(type_);
+
   if (method_bitmap_size_ != 0) {
     CHECK_EQ(kOatClassSomeCompiled, type_);
-    if (!out->WriteFully(&method_bitmap_size_, sizeof(method_bitmap_size_))) {
+    if (!oat_writer->WriteData(out, &method_bitmap_size_, sizeof(method_bitmap_size_))) {
       PLOG(ERROR) << "Failed to write method bitmap size to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += sizeof(method_bitmap_size_);
-    if (!out->WriteFully(method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
+
+    if (!oat_writer->WriteData(out, method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
       PLOG(ERROR) << "Failed to write method bitmap to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += method_bitmap_size_;
   }
-  if (!out->WriteFully(&method_offsets_[0],
-                      sizeof(method_offsets_[0]) * method_offsets_.size())) {
+
+  if (!oat_writer->WriteData(out, method_offsets_.data(), GetMethodOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write method offsets to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_class_method_offsets_ += sizeof(method_offsets_[0]) * method_offsets_.size();
+  oat_writer->size_oat_class_method_offsets_ += GetMethodOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 6c46ebc..5feb5fc 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -21,12 +21,14 @@
 #include <cstddef>
 #include <memory>
 
+#include "base/dchecked_vector.h"
 #include "linker/relative_patcher.h"  // For linker::RelativePatcherTargetProvider.
 #include "mem_map.h"
 #include "method_reference.h"
 #include "mirror/class.h"
 #include "oat.h"
 #include "safe_map.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -124,8 +126,8 @@
     return bss_size_;
   }
 
-  const std::vector<uintptr_t>& GetAbsolutePatchLocations() const {
-    return absolute_patch_locations_;
+  ArrayRef<const uintptr_t> GetAbsolutePatchLocations() const {
+    return ArrayRef<const uintptr_t>(absolute_patch_locations_);
   }
 
   bool WriteRodata(OutputStream* out);
@@ -133,8 +135,8 @@
 
   ~OatWriter();
 
-  const std::vector<dwarf::MethodDebugInfo>& GetMethodDebugInfo() const {
-    return method_info_;
+  ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
+    return ArrayRef<const dwarf::MethodDebugInfo>(method_info_);
   }
 
   const CompilerDriver* GetCompilerDriver() {
@@ -142,6 +144,9 @@
   }
 
  private:
+  class OatClass;
+  class OatDexFile;
+
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
   // we can share a lot of code for processing the maps with template classes below.
@@ -175,10 +180,8 @@
   size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
-  size_t InitOatCode(size_t offset)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  size_t InitOatCodeDexFiles(size_t offset)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  size_t InitOatCode(size_t offset);
+  size_t InitOatCodeDexFiles(size_t offset);
 
   bool WriteTables(OutputStream* out, const size_t file_offset);
   bool WriteLookupTables(OutputStream* out, const size_t file_offset);
@@ -186,93 +189,11 @@
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
+  bool GetOatDataOffset(OutputStream* out);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+  bool WriteData(OutputStream* out, const void* data, size_t size);
 
-  class OatDexFile {
-   public:
-    OatDexFile(size_t offset, const DexFile& dex_file);
-    size_t SizeOf() const;
-    void UpdateChecksum(OatHeader* oat_header) const;
-    bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
-
-    // Offset of start of OatDexFile from beginning of OatHeader. It is
-    // used to validate file position when writing.
-    size_t offset_;
-
-    // data to write
-    uint32_t dex_file_location_size_;
-    const uint8_t* dex_file_location_data_;
-    uint32_t dex_file_location_checksum_;
-    uint32_t dex_file_offset_;
-    uint32_t lookup_table_offset_;
-    TypeLookupTable* lookup_table_;  // Owned by the dex file.
-    std::vector<uint32_t> methods_offsets_;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(OatDexFile);
-  };
-
-  class OatClass {
-   public:
-    OatClass(size_t offset,
-             const std::vector<CompiledMethod*>& compiled_methods,
-             uint32_t num_non_null_compiled_methods,
-             mirror::Class::Status status);
-    ~OatClass();
-    size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
-    size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
-    size_t SizeOf() const;
-    void UpdateChecksum(OatHeader* oat_header) const;
-    bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
-
-    CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const {
-      DCHECK_LT(class_def_method_index, compiled_methods_.size());
-      return compiled_methods_[class_def_method_index];
-    }
-
-    // Offset of start of OatClass from beginning of OatHeader. It is
-    // used to validate file position when writing.
-    size_t offset_;
-
-    // CompiledMethods for each class_def_method_index, or null if no method is available.
-    std::vector<CompiledMethod*> compiled_methods_;
-
-    // Offset from OatClass::offset_ to the OatMethodOffsets for the
-    // class_def_method_index. If 0, it means the corresponding
-    // CompiledMethod entry in OatClass::compiled_methods_ should be
-    // null and that the OatClass::type_ should be kOatClassBitmap.
-    std::vector<uint32_t> oat_method_offsets_offsets_from_oat_class_;
-
-    // data to write
-
-    static_assert(mirror::Class::Status::kStatusMax < (2 ^ 16), "class status won't fit in 16bits");
-    int16_t status_;
-
-    static_assert(OatClassType::kOatClassMax < (2 ^ 16), "oat_class type won't fit in 16bits");
-    uint16_t type_;
-
-    uint32_t method_bitmap_size_;
-
-    // bit vector indexed by ClassDef method index. When
-    // OatClassType::type_ is kOatClassBitmap, a set bit indicates the
-    // method has an OatMethodOffsets in methods_offsets_, otherwise
-    // the entry was ommited to save space. If OatClassType::type_ is
-    // not is kOatClassBitmap, the bitmap will be null.
-    BitVector* method_bitmap_;
-
-    // OatMethodOffsets and OatMethodHeaders for each CompiledMethod
-    // present in the OatClass. Note that some may be missing if
-    // OatClass::compiled_methods_ contains null values (and
-    // oat_method_offsets_offsets_from_oat_class_ should contain 0
-    // values in this case).
-    std::vector<OatMethodOffsets> method_offsets_;
-    std::vector<OatQuickMethodHeader> method_headers_;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(OatClass);
-  };
-
-  std::vector<dwarf::MethodDebugInfo> method_info_;
+  dchecked_vector<dwarf::MethodDebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
@@ -301,9 +222,9 @@
 
   // data to write
   SafeMap<std::string, std::string>* key_value_store_;
-  OatHeader* oat_header_;
-  std::vector<OatDexFile*> oat_dex_files_;
-  std::vector<OatClass*> oat_classes_;
+  std::unique_ptr<OatHeader> oat_header_;
+  dchecked_vector<OatDexFile> oat_dex_files_;
+  dchecked_vector<OatClass> oat_classes_;
   std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
   std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
   std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
@@ -336,14 +257,14 @@
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
-  uint32_t size_oat_dex_file_methods_offsets_;
+  uint32_t size_oat_dex_file_lookup_table_offset_;
+  uint32_t size_oat_dex_file_class_offsets_;
+  uint32_t size_oat_lookup_table_alignment_;
+  uint32_t size_oat_lookup_table_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
-  uint32_t size_oat_lookup_table_alignment_;
-  uint32_t size_oat_lookup_table_offset_;
-  uint32_t size_oat_lookup_table_;
 
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 4c3f66a..dc75ff1 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1590,15 +1590,18 @@
     HGraph* graph = GetGraph();
     HInstruction* zero;
     switch (type) {
-      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
-      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
-      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      case Primitive::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
       default: zero = graph->GetConstant(type, 0); break;
     }
     HPhi* phi = new (graph->GetArena())
         HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
     phi->SetRawInputAt(0, instruction);
     phi->SetRawInputAt(1, zero);
+    if (type == Primitive::kPrimNot) {
+      phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo());
+    }
     new_preheader->AddPhi(phi);
     return phi;
   }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index e1404ce..1178d0f 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -2841,15 +2841,21 @@
     }
 
     case Instruction::CONST_STRING: {
+      uint32_t string_index = instruction.VRegB_21c();
+      bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
+          *dex_file_, string_index);
       current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_21c(), dex_pc));
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
 
     case Instruction::CONST_STRING_JUMBO: {
+      uint32_t string_index = instruction.VRegB_31c();
+      bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
+          *dex_file_, string_index);
       current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_31c(), dex_pc));
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0a26786..9fda838 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3234,6 +3234,147 @@
   }
 }
 
+void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) {
+  Register in = locations->InAt(0).AsRegister<Register>();
+  Location rhs = locations->InAt(1);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, rot);
+    } else if (out != in) {
+      __ Mov(out, in);
+    }
+  } else {
+    __ Ror(out, in, rhs.AsRegister<Register>());
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Location rhs = locations->InAt(1);
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  if (rhs.IsConstant()) {
+    uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= kMaxLongShiftValue;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= kArmBitsPerWord) {
+      rot -= kArmBitsPerWord;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot != 0u) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    Register shift_right = locations->GetTemp(0).AsRegister<Register>();
+    Register shift_left = locations->GetTemp(1).AsRegister<Register>();
+    Label end;
+    Label shift_by_32_plus_shift_right;
+
+    __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+    __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
+    __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep);
+    __ b(&shift_by_32_plus_shift_right, CC);
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+    __ b(&end);
+
+    __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+    __ Bind(&end);
+  }
+}
+void LocationsBuilderARM::HandleRotate(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (ror->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::HandleRotate(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Primitive::Type type = ror->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HandleIntegerRotate(locations);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      HandleLongRotate(locations);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM::VisitRor(HRor* op) {
+  HandleRotate(op);
+}
+
+void InstructionCodeGeneratorARM::VisitRor(HRor* op) {
+  HandleRotate(op);
+}
+
 void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
 
@@ -5067,16 +5208,15 @@
 }
 
 void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
@@ -5107,8 +5247,12 @@
     __ LoadFromOffset(kLoadWord, out, out, cache_offset);
   }
 
-  __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+  if (!load->IsInDexCache()) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 static int32_t GetExceptionTlsOffset() {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 193add2..8193c28 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -170,6 +170,9 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -213,6 +216,9 @@
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 227f4be..5205830 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1791,6 +1791,17 @@
         __ Orr(dst, lhs, rhs);
       } else if (instr->IsSub()) {
         __ Sub(dst, lhs, rhs);
+      } else if (instr->IsRor()) {
+        if (rhs.IsImmediate()) {
+          uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+          __ Ror(dst, lhs, shift);
+        } else {
+          // Ensure shift distance is in the same size register as the result. If
+          // we are rotating a long and the shift comes in a w register originally,
+          // we don't need to sxtw for use as an x since the shift distances are
+          // all & reg_bits - 1.
+          __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
+        }
       } else {
         DCHECK(instr->IsXor());
         __ Eor(dst, lhs, rhs);
@@ -3850,16 +3861,15 @@
 }
 
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
-  codegen_->AddSlowPath(slow_path);
-
   Location out_loc = load->GetLocations()->Out();
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
@@ -3889,8 +3899,12 @@
     __ Ldr(out, MemOperand(out.X(), cache_offset));
   }
 
-  __ Cbz(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+  if (!load->IsInDexCache()) {
+    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderARM64::VisitLocal(HLocal* local) {
@@ -4229,6 +4243,7 @@
 
     default:
       LOG(FATAL) << "Unexpected rem type " << type;
+      UNREACHABLE();
   }
 }
 
@@ -4258,6 +4273,14 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
+void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -4295,6 +4318,7 @@
 
     default:
       LOG(FATAL) << "Unimplemented local type " << field_type;
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index d092de9..ce7cbcd 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3413,24 +3413,28 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Register out = locations->Out().AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
   __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  __ Beqz(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+
+  if (!load->IsInDexCache()) {
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+    codegen_->AddSlowPath(slow_path);
+    __ Beqz(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
@@ -3913,6 +3917,16 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderMIPS::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 78f5644..1a9de15 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3105,16 +3105,15 @@
 }
 
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
@@ -3123,8 +3122,13 @@
   __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
   __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
   // TODO: We will need a read barrier here.
-  __ Beqzc(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+
+  if (!load->IsInDexCache()) {
+    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+    codegen_->AddSlowPath(slow_path);
+    __ Beqzc(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
@@ -3519,6 +3523,16 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 19f03df..469dd49 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -433,6 +433,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
  public:
@@ -454,7 +504,7 @@
     // to be instrumented, e.g.:
     //
     //   __ movl(out, Address(out, offset));
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -470,7 +520,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -612,14 +664,18 @@
 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1831,7 +1887,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
@@ -3759,6 +3815,92 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderX86::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimLong:
+      // Add the temporary needed.
+      locations->AddTemp(Location::RequiresRegister());
+      FALLTHROUGH_INTENDED;
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  if (ror->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    if (second.IsRegister()) {
+      Register second_reg = second.AsRegister<Register>();
+      __ rorl(first_reg, second_reg);
+    } else {
+      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+      __ rorl(first_reg, imm);
+    }
+    return;
+  }
+
+  DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
+  Register first_reg_lo = first.AsRegisterPairLow<Register>();
+  Register first_reg_hi = first.AsRegisterPairHigh<Register>();
+  Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+  if (second.IsRegister()) {
+    Register second_reg = second.AsRegister<Register>();
+    DCHECK_EQ(second_reg, ECX);
+    __ movl(temp_reg, first_reg_hi);
+    __ shrd(first_reg_hi, first_reg_lo, second_reg);
+    __ shrd(first_reg_lo, temp_reg, second_reg);
+    __ movl(temp_reg, first_reg_hi);
+    __ testl(second_reg, Immediate(32));
+    __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
+    __ cmovl(kNotEqual, first_reg_lo, temp_reg);
+  } else {
+    int32_t shift_amt =
+      CodeGenerator::GetInt64ValueOf(second.GetConstant()) & kMaxLongShiftValue;
+    if (shift_amt == 0) {
+      // Already fine.
+      return;
+    }
+    if (shift_amt == 32) {
+      // Just swap.
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+      return;
+    }
+
+    Immediate imm(shift_amt);
+    // Save the constents of the low value.
+    __ movl(temp_reg, first_reg_lo);
+
+    // Shift right into low, feeding bits from high.
+    __ shrd(first_reg_lo, first_reg_hi, imm);
+
+    // Shift right into high, feeding bits from the original low.
+    __ shrd(first_reg_hi, temp_reg, imm);
+
+    // Swap if needed.
+    if (shift_amt > 32) {
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+    }
+  }
+}
+
 void LocationsBuilderX86::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -4006,7 +4148,7 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
@@ -4260,9 +4402,14 @@
 
   if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
     // Long values can be loaded atomically into an XMM using movsd.
-    // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM
-    // and then copy the XMM into the output 32bits at a time).
+    // So we use an XMM register as a temp to achieve atomicity (first
+    // load the temp into the XMM and then copy the XMM into the
+    // output, 32 bits at a time).
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4300,9 +4447,32 @@
     }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ movl(out.AsRegister<Register>(), Address(base, offset));
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<Register>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
     case Primitive::kPrimLong: {
@@ -4337,17 +4507,20 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4412,7 +4585,7 @@
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   bool maybe_record_implicit_null_check_done = false;
@@ -4517,7 +4690,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4698,6 +4871,11 @@
             Location::kOutputOverlap :
             Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -4705,12 +4883,13 @@
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4722,7 +4901,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4734,7 +4913,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4746,7 +4925,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4756,13 +4935,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4772,20 +4947,56 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK_NE(obj, out.AsRegisterPairLow<Register>());
+      DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
+        __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
+        __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
       } else {
-        __ movl(out.AsRegisterPairLow<Register>(),
+        __ movl(out_loc.AsRegisterPairLow<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(),
+        __ movl(out_loc.AsRegisterPairHigh<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
       }
       break;
@@ -4793,7 +5004,7 @@
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4805,7 +5016,7 @@
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4820,23 +5031,12 @@
       UNREACHABLE();
   }
 
-  if (type != Primitive::kPrimLong) {
+  if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long arrays, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
-
-  if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
-  }
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
@@ -4968,12 +5168,12 @@
           //   __ movl(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
           //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
           //
           //   __ cmpl(temp, temp2);
@@ -5254,8 +5454,8 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ fs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
+  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5536,32 +5736,16 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ leal(out, Address(current_method, declaring_class_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ movl(out, Address(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movl(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ leal(out, Address(out, cache_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ movl(out, Address(out, cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5611,49 +5795,36 @@
 }
 
 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ leal(out, Address(current_method, declaring_class_offset));
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ movl(out, Address(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ leal(out, Address(out, cache_offset));
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ movl(out, Address(out, cache_offset));
+  if (!load->IsInDexCache()) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+    codegen_->AddSlowPath(slow_path);
+    __ testl(out, out);
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
   }
-
-  __ testl(out, out);
-  __ j(kEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
@@ -5693,6 +5864,14 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5718,21 +5897,22 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5748,10 +5928,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ movl(out, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
@@ -5772,17 +5951,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5811,17 +5981,8 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5845,17 +6006,8 @@
       }
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ movl(out, Address(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5899,6 +6051,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
                                                                     /* is_fatal */ false);
@@ -5951,27 +6110,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5991,8 +6150,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ movl(temp, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -6014,18 +6172,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -6038,8 +6186,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -6065,18 +6212,8 @@
       }
       __ j(kEqual, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -6088,8 +6225,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6106,19 +6242,8 @@
       __ j(kEqual, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ movl(temp, Address(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -6132,8 +6257,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -6141,8 +6265,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6159,6 +6282,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -6320,14 +6450,226 @@
   }
 }
 
-void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
-                                           Location out,
-                                           Location ref,
-                                           Location obj,
-                                           uint32_t offset,
-                                           Location index) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          Register obj,
+                                                          uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ movl(root_reg, Address(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ leal(root_reg, Address(obj, offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ movl(root_reg, Address(obj, offset));
+  }
+}
+
+void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 const Address& src,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -6341,57 +6683,41 @@
       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                Location out,
-                                                Location ref,
-                                                Location obj,
-                                                uint32_t offset,
-                                                Location index) {
+void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<Register>());
   }
 }
 
-void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                  Location out,
-                                                  Location root) {
+void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6750,7 +7076,7 @@
 // TODO: target as memory.
 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
   if (!target.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f9403a6..7121799 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -219,11 +219,44 @@
   void GenerateShlLong(const Location& loc, int shift);
   void GenerateShrLong(const Location& loc, int shift);
   void GenerateUShrLong(const Location& loc, int shift);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
+
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
   void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -364,6 +397,8 @@
                   Register value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -405,7 +440,26 @@
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -422,23 +476,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -448,9 +504,18 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   struct PcRelativeDexCacheAccessInfo {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 44a51ea..2c5fbc7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -456,6 +456,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
  public:
@@ -477,7 +527,7 @@
     // reference load to be instrumented, e.g.:
     //
     //   __ movl(out, Address(out, offset));
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -493,7 +543,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -634,13 +686,17 @@
 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -731,7 +787,7 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
       __ gs()->movl(temp.AsRegister<CpuRegister>(),
-                    Address::Absolute(invoke->GetStringInitOffset(), true));
+                    Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
@@ -748,7 +804,7 @@
       pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
                                                   invoke->GetDexCacheArrayOffset());
       __ movq(temp.AsRegister<CpuRegister>(),
-              Address::Absolute(kDummy32BitOffset, false /* no_rip */));
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
       // Bind the label at the end of the "movl" insn.
       __ Bind(&pc_relative_dex_cache_patches_.back().label);
       break;
@@ -907,7 +963,7 @@
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
   ValidateInvokeRuntime(instruction, slow_path);
-  __ gs()->call(Address::Absolute(entry_point_offset, true));
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -1939,7 +1995,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -2667,7 +2723,8 @@
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<uint16_t>(
+                        in.GetConstant()->AsIntConstant()->GetValue())));
           }
           break;
 
@@ -2911,7 +2968,8 @@
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ addss(first.AsFpuRegister<XmmRegister>(),
@@ -2925,7 +2983,8 @@
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ addsd(first.AsFpuRegister<XmmRegister>(),
@@ -3000,7 +3059,8 @@
         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ subss(first.AsFpuRegister<XmmRegister>(),
@@ -3014,7 +3074,8 @@
         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ subsd(first.AsFpuRegister<XmmRegister>(),
@@ -3121,7 +3182,8 @@
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ mulss(first.AsFpuRegister<XmmRegister>(),
@@ -3136,7 +3198,8 @@
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
@@ -3542,7 +3605,8 @@
         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ divss(first.AsFpuRegister<XmmRegister>(),
@@ -3556,7 +3620,8 @@
         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ divsd(first.AsFpuRegister<XmmRegister>(),
@@ -3755,6 +3820,56 @@
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorl(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+        __ rorl(first_reg, imm);
+      }
+      break;
+    case Primitive::kPrimLong:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorq(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
+        __ rorq(first_reg, imm);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
   }
 }
 
@@ -3910,10 +4025,10 @@
   LOG(FATAL) << "Unimplemented";
 }
 
-void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
-   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
   switch (kind) {
@@ -3953,6 +4068,11 @@
         Location::RequiresRegister(),
         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -3988,12 +4108,36 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
 
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
@@ -4014,14 +4158,20 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  if (field_type == Primitive::kPrimNot) {
+    // Potential implicit null checks, in the case of reference
+    // fields, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+  if (is_volatile) {
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4075,7 +4225,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   bool maybe_record_implicit_null_check_done = false;
@@ -4181,7 +4331,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4358,6 +4508,11 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4365,12 +4520,13 @@
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  Location out_loc = locations->Out();
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4382,7 +4538,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4394,7 +4550,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4406,7 +4562,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4416,13 +4572,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4432,9 +4584,46 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        CpuRegister out = out_loc.AsRegister<CpuRegister>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4446,7 +4635,7 @@
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4458,7 +4647,7 @@
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4472,20 +4661,12 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -4609,12 +4790,12 @@
           //   __ movl(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
           //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
           //
           //   __ cmpl(temp, temp2);
@@ -4840,8 +5021,8 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ gs()->movq(card, Address::Absolute(
-      Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+                                        /* no_rip */ true));
   __ movq(temp, object);
   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   __ movb(Address(temp, card, TIMES_1, 0), card);
@@ -4900,8 +5081,9 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ gs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
+  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+                                  /* no_rip */ true),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5125,7 +5307,7 @@
           Immediate(mirror::Class::kStatusInitialized));
   __ j(kLess, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
-  // No need for memory fence, thanks to the X86_64 memory model.
+  // No need for memory fence, thanks to the x86-64 memory model.
 }
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5156,32 +5338,16 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ leaq(out, Address(current_method, declaring_class_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ movl(out, Address(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movq(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ leaq(out, Address(out, cache_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ movl(out, Address(out, cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5220,53 +5386,41 @@
 }
 
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ leaq(out, Address(current_method, declaring_class_offset));
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ movl(out, Address(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ leaq(out, Address(out, cache_offset));
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ movl(out, Address(out, cache_offset));
+  if (!load->IsInDexCache()) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+    codegen_->AddSlowPath(slow_path);
+    __ testl(out, out);
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
   }
-
-  __ testl(out, out);
-  __ j(kEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
+  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+                           /* no_rip */ true);
 }
 
 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -5302,6 +5456,14 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5327,21 +5489,22 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5357,10 +5520,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ movl(out, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5386,17 +5548,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5425,17 +5578,8 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5459,17 +5603,8 @@
       }
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ movl(out, Address(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5513,6 +5648,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
                                                                        /* is_fatal */ false);
@@ -5565,27 +5707,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5597,7 +5739,7 @@
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done;
+  Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ testl(obj, obj);
@@ -5605,8 +5747,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ movl(temp, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5628,18 +5769,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5652,8 +5783,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5679,18 +5809,8 @@
       }
       __ j(kEqual, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5702,8 +5822,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5720,19 +5839,8 @@
       __ j(kEqual, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ movl(temp, Address(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5746,8 +5854,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5755,8 +5862,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5773,6 +5879,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -5916,14 +6029,227 @@
   }
 }
 
-void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
-                                              Location out,
-                                              Location ref,
-                                              Location obj,
-                                              uint32_t offset,
-                                              Location index) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                      Location out,
+                                                                      uint32_t offset,
+                                                                      Location temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(temp.AsRegister<CpuRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                       Location out,
+                                                                       Location obj,
+                                                                       uint32_t offset,
+                                                                       Location temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                             Location root,
+                                                             CpuRegister obj,
+                                                             uint32_t offset) {
+  CpuRegister root_reg = root.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ movl(root_reg, Address(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+                                      /* no_rip */ true),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ leaq(root_reg, Address(obj, offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ movl(root_reg, Address(obj, offset));
+  }
+}
+
+void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t offset,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t data_offset,
+                                                                Location index,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    CpuRegister obj,
+                                                                    const Address& src,
+                                                                    Location temp,
+                                                                    bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86-64 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -5937,57 +6263,41 @@
       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                   Location out,
-                                                   Location ref,
-                                                   Location obj,
-                                                   uint32_t offset,
-                                                   Location index) {
+void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location ref,
+                                                       Location obj,
+                                                       uint32_t offset,
+                                                       Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
   }
 }
 
-void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                     Location out,
-                                                     Location root) {
+void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                         Location out,
+                                                         Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6236,7 +6546,7 @@
 // TODO: trg as memory.
 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 145b1f3..dda9ea2 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -47,6 +47,12 @@
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
 
+// These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
+// If the ART ABI changes, this list must be updated.  It is used to ensure that
+// these are not clobbered by any direct call to native code (such as math intrinsics).
+static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
+
+
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeRuntimeCallingConvention()
@@ -207,11 +213,44 @@
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               CpuRegister obj,
+                               uint32_t offset);
+
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -318,6 +357,8 @@
                   CpuRegister value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
 
@@ -350,7 +391,26 @@
     return isa_features_;
   }
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             CpuRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             CpuRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -367,23 +427,25 @@
   // When `index` provided (i.e., when it is different from
   // Location::NoLocation()), the offset value passed to
   // artReadBarrierSlow is adjusted to take `index` into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -393,7 +455,7 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
   int ConstantAreaStart() const {
     return constant_area_start_;
@@ -418,6 +480,15 @@
                           HInstruction* instruction);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
         : target_dex_file(dex_file), element_offset(element_off), label() { }
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index af8b8b5..10d8343 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -202,6 +202,11 @@
     return true;
   }
 
+  // Our code generator ensures shift distances are within an encodable range.
+  if (instr->IsRor()) {
+    return true;
+  }
+
   int64_t value = CodeGenerator::GetInt64ValueOf(constant);
 
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index e469c8d..a8f65bf 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -32,7 +32,7 @@
 /**
  * Fixture class for the constant folding and dce tests.
  */
-class ConstantFoldingTest : public testing::Test {
+class ConstantFoldingTest : public CommonCompilerTest {
  public:
   ConstantFoldingTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -56,7 +56,7 @@
                             const std::string& expected_after_dce,
                             std::function<void(HGraph*)> check_after_cf) {
     ASSERT_NE(graph_, nullptr);
-    graph_->TryBuildingSsa();
+    TransformToSsa(graph_);
 
     StringPrettyPrinter printer_before(graph_);
     printer_before.VisitInsertionOrder();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 2c6a1ef..f0f98ef 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -26,6 +26,8 @@
 
 namespace art {
 
+class DeadCodeEliminationTest : public CommonCompilerTest {};
+
 static void TestCode(const uint16_t* data,
                      const std::string& expected_before,
                      const std::string& expected_after) {
@@ -34,7 +36,7 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
 
   StringPrettyPrinter printer_before(graph);
   printer_before.VisitInsertionOrder();
@@ -55,7 +57,6 @@
   ASSERT_EQ(actual_after, expected_after);
 }
 
-
 /**
  * Small three-register program.
  *
@@ -69,7 +70,7 @@
  * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
  *     return-void              7.      return
  */
-TEST(DeadCodeElimination, AdditionAndConditionalJump) {
+TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
@@ -131,7 +132,7 @@
  * L3: v2 <- v1 + 4             11.     add-int/lit16 v2, v1, #+4
  *     return                   13.     return-void
  */
-TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) {
+TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c16b872..f3c1dbe 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -24,6 +24,7 @@
 #include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -594,6 +595,17 @@
       }
     }
   }
+
+  // Ensure that reference type instructions have reference type info.
+  if (instruction->GetType() == Primitive::kPrimNot) {
+    ScopedObjectAccess soa(Thread::Current());
+    if (!instruction->GetReferenceTypeInfo().IsValid()) {
+      AddError(StringPrintf("Reference type instruction %s:%d does not have "
+                            "valid reference type information.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
+  }
 }
 
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
@@ -850,7 +862,7 @@
 
 void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
   VisitInstruction(op);
-  if (op->IsUShr() || op->IsShr() || op->IsShl()) {
+  if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
     if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
       AddError(StringPrintf(
           "Shift operation %s %d has a non-int kind second input: "
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index fee56c7..d10df4c 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -17,8 +17,6 @@
 #include "graph_checker.h"
 #include "optimizing_unit_test.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
 /**
@@ -43,7 +41,6 @@
   return graph;
 }
 
-
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -61,8 +58,7 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  graph->BuildDominatorTree();
-  graph->TransformToSsa();
+  TransformToSsa(graph);
 
   SSAChecker ssa_checker(graph);
   ssa_checker.Run();
@@ -145,7 +141,9 @@
   ASSERT_FALSE(graph_checker.IsValid());
 }
 
-TEST(SSAChecker, SSAPhi) {
+class SSACheckerTest : public CommonCompilerTest {};
+
+TEST_F(SSACheckerTest, SSAPhi) {
   // This code creates one Phi function during the conversion to SSA form.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e9fdb84..5f1328f 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -30,6 +30,7 @@
 #include "optimization.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 
@@ -505,7 +506,7 @@
       } else {
         StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
       }
-    } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)
+    } else if ((IsPass(SsaBuilder::kSsaBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
         && (instruction->GetType() == Primitive::kPrimNot)) {
       ReferenceTypeInfo info = instruction->IsLoadClass()
@@ -519,21 +520,15 @@
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
-      } else if (instruction->IsNullConstant()) {
+      } else {
         // The NullConstant may be added to the graph during other passes that happen between
         // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
         // doesn't run or doesn't inline anything, the NullConstant remains untyped.
         // So we should check NullConstants for validity only after reference type propagation.
-        //
-        // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
-        // for the benefits.
-        StartAttributeStream("klass") << "not_set";
-        DCHECK(!is_after_pass_
-            || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
-            << " Expected a valid rti after reference type propagation";
-      } else {
-        DCHECK(!is_after_pass_)
-            << "Expected a valid rti after reference type propagation";
+        DCHECK(graph_in_bad_state_ ||
+               (!is_after_pass_ && IsPass(SsaBuilder::kSsaBuilderPassName)))
+            << instruction->DebugName() << instruction->GetId() << " has invalid rti "
+            << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
       }
     }
     if (disasm_info_ != nullptr) {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index de60cf2..9929696 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -21,11 +21,11 @@
 #include "optimizing_unit_test.h"
 #include "side_effects_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
-TEST(GVNTest, LocalFieldElimination) {
+class GVNTest : public CommonCompilerTest {};
+
+TEST_F(GVNTest, LocalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   NullHandle<mirror::DexCache> dex_cache;
@@ -100,7 +100,7 @@
   ASSERT_EQ(different_offset->GetBlock(), block);
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -110,7 +110,7 @@
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 }
 
-TEST(GVNTest, GlobalFieldElimination) {
+TEST_F(GVNTest, GlobalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   NullHandle<mirror::DexCache> dex_cache;
@@ -182,7 +182,7 @@
                                                           0));
   join->AddInstruction(new (&allocator) HExit());
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -193,7 +193,7 @@
   ASSERT_TRUE(join->GetFirstInstruction()->IsExit());
 }
 
-TEST(GVNTest, LoopFieldElimination) {
+TEST_F(GVNTest, LoopFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   NullHandle<mirror::DexCache> dex_cache;
@@ -288,7 +288,7 @@
   ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
   ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   {
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
@@ -316,7 +316,7 @@
 }
 
 // Test that inner loops affect the side effects of the outer loop.
-TEST(GVNTest, LoopSideEffects) {
+TEST_F(GVNTest, LoopSideEffects) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   NullHandle<mirror::DexCache> dex_cache;
@@ -364,7 +364,7 @@
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
   outer_loop_exit->AddInstruction(new (&allocator) HExit());
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
 
   ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
       *outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 5de94f4..776c115 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -18,7 +18,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "induction_var_analysis.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -28,7 +27,7 @@
 /**
  * Fixture class for the InductionVarAnalysis tests.
  */
-class InductionVarAnalysisTest : public testing::Test {
+class InductionVarAnalysisTest : public CommonCompilerTest {
  public:
   InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -102,6 +101,7 @@
       basic_[d] = new (&allocator_) HLocal(d);
       entry_->AddInstruction(basic_[d]);
       loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_));
+      loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto());
       HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
       loop_header_[d]->AddInstruction(load);
       HInstruction* compare = new (&allocator_) HLessThan(load, constant100_);
@@ -168,7 +168,7 @@
 
   // Performs InductionVarAnalysis (after proper set up).
   void PerformInductionVarAnalysis() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    TransformToSsa(graph_);
     iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
     iva_->Run();
   }
@@ -212,7 +212,7 @@
   //   ..
   // }
   BuildLoopNest(10);
-  ASSERT_TRUE(graph_->TryBuildingSsa());
+  TransformToSsa(graph_);
   ASSERT_EQ(entry_->GetLoopInformation(), nullptr);
   for (int d = 0; d < 1; d++) {
     ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(),
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 128b5bb..a1c797a 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -16,7 +16,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "induction_var_analysis.h"
 #include "induction_var_range.h"
 #include "nodes.h"
@@ -29,7 +28,7 @@
 /**
  * Fixture class for the InductionVarRange tests.
  */
-class InductionVarRangeTest : public testing::Test {
+class InductionVarRangeTest : public CommonCompilerTest {
  public:
   InductionVarRangeTest()  : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -105,7 +104,7 @@
 
   /** Performs induction variable analysis. */
   void PerformInductionVarAnalysis() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    TransformToSsa(graph_);
     iva_->Run();
   }
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a4dcb3a..db11709 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -33,6 +33,7 @@
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
 #include "sharpening.h"
+#include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -514,7 +515,7 @@
     return false;
   }
 
-  if (!callee_graph->TryBuildingSsa()) {
+  if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be transformed to SSA";
     return false;
@@ -549,14 +550,12 @@
   // Run simple optimizations on the graph.
   HDeadCodeElimination dce(callee_graph, stats_);
   HConstantFolding fold(callee_graph);
-  ReferenceTypePropagation type_propagation(callee_graph, handles_);
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
-    &type_propagation,
     &sharpening,
     &simplify,
     &fold,
@@ -677,42 +676,36 @@
     DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
   }
 
-  // When merging the graph we might create a new NullConstant in the caller graph which does
-  // not have the chance to be typed. We assign the correct type here so that we can keep the
-  // assertion that every reference has a valid type. This also simplifies checks along the way.
-  HNullConstant* null_constant = graph_->GetNullConstant();
-  if (!null_constant->GetReferenceTypeInfo().IsValid()) {
-    ReferenceTypeInfo::TypeHandle obj_handle =
-        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
-    null_constant->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
-  }
-
   // Check the integrity of reference types and run another type propagation if needed.
-  if ((return_replacement != nullptr)
-      && (return_replacement->GetType() == Primitive::kPrimNot)) {
-    if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
-      // Make sure that we have a valid type for the return. We may get an invalid one when
-      // we inline invokes with multiple branches and create a Phi for the result.
-      // TODO: we could be more precise by merging the phi inputs but that requires
-      // some functionality from the reference type propagation.
-      DCHECK(return_replacement->IsPhi());
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-      ReferenceTypeInfo::TypeHandle return_handle =
-          handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
-      return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-         return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
-    }
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
+      if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
+        // Make sure that we have a valid type for the return. We may get an invalid one when
+        // we inline invokes with multiple branches and create a Phi for the result.
+        // TODO: we could be more precise by merging the phi inputs but that requires
+        // some functionality from the reference type propagation.
+        DCHECK(return_replacement->IsPhi());
+        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        ReferenceTypeInfo::TypeHandle return_handle =
+            handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
+        return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
+            return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
+      }
 
-    if (do_rtp) {
-      // If the return type is a refinement of the declared type run the type propagation again.
-      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-      if (invoke_rti.IsStrictSupertypeOf(return_rti)
-          || (return_rti.IsExact() && !invoke_rti.IsExact())
-          || !return_replacement->CanBeNull()) {
-        ReferenceTypePropagation rtp_fixup(graph_, handles_);
-        rtp_fixup.Run();
+      if (do_rtp) {
+        // If the return type is a refinement of the declared type run the type propagation again.
+        ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+        ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+        if (invoke_rti.IsStrictSupertypeOf(return_rti)
+            || (return_rti.IsExact() && !invoke_rti.IsExact())
+            || !return_replacement->CanBeNull()) {
+          ReferenceTypePropagation(graph_, handles_).Run();
+        }
+      }
+    } else if (return_replacement->IsInstanceOf()) {
+      if (do_rtp) {
+        // Inlining InstanceOf into an If may put a tighter bound on reference types.
+        ReferenceTypePropagation(graph_, handles_).Run();
       }
     }
   }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2f3df7f..e1b13c5 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -39,6 +39,12 @@
     }
   }
 
+  bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotate(HBinaryOperation* instruction);
+  bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+
   bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
   void VisitShift(HBinaryOperation* shift);
 
@@ -77,6 +83,7 @@
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifyRotate(HInvoke* invoke, bool is_left);
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
 
@@ -173,6 +180,161 @@
   }
 }
 
+static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) {
+  return (sub->GetRight() == other &&
+          sub->GetLeft()->IsConstant() &&
+          (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0);
+}
+
+bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
+                                                        HUShr* ushr,
+                                                        HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(),
+                                                ushr->GetLeft(),
+                                                ushr->GetRight());
+  op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror);
+  if (!ushr->HasUses()) {
+    ushr->GetBlock()->RemoveInstruction(ushr);
+  }
+  if (!ushr->GetRight()->HasUses()) {
+    ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight());
+  }
+  if (!shl->HasUses()) {
+    shl->GetBlock()->RemoveInstruction(shl);
+  }
+  if (!shl->GetRight()->HasUses()) {
+    shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
+  }
+  return true;
+}
+
+// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
+bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
+  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
+  // TODO: Implement it for MIPS/64.
+  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  switch (instruction_set) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64:
+      break;
+    default:
+      return false;
+  }
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+  // If we have an UShr and a Shl (in either order).
+  if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) {
+    HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr();
+    HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl();
+    DCHECK(Primitive::IsIntOrLongType(ushr->GetType()));
+    if (ushr->GetType() == shl->GetType() &&
+        ushr->GetLeft() == shl->GetLeft()) {
+      if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) {
+        // Shift distances are both constant, try replacing with Ror if they
+        // add up to the register size.
+        return TryReplaceWithRotateConstantPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) {
+        // Shift distances are potentially of the form x and (reg_size - x).
+        return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) {
+        // Shift distances are potentially of the form d and -d.
+        return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl);
+      }
+    }
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> #rdist OP x << #ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #ldist
+//    OP   dst, dst, tmp
+// or like (x >>> #rdist OP x << #-ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #-ldist
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   #rdist
+bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op,
+                                                                       HUShr* ushr,
+                                                                       HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant());
+  size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant());
+  if (((ldist + rdist) & (reg_bits - 1)) == 0) {
+    ReplaceRotateWithRor(op, ushr, shl);
+    return true;
+  }
+  return false;
+}
+
+// Replace code looking like (x >>> -d OP x << d):
+//    Neg  neg, d
+//    UShr dst, x,   neg
+//    Shl  tmp, x,   d
+//    OP   dst, dst, tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,   neg
+// *** OR ***
+// Replace code looking like (x >>> d OP x << -d):
+//    UShr dst, x,   d
+//    Neg  neg, d
+//    Shl  tmp, x,   neg
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   d
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg());
+  bool neg_is_left = shl->GetRight()->IsNeg();
+  HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg();
+  // And the shift distance being negated is the distance being shifted the other way.
+  if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) {
+    ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> d OP x << (#bits - d)):
+//    UShr dst, x,     d
+//    Sub  ld,  #bits, d
+//    Shl  tmp, x,     ld
+//    OP   dst, dst,   tmp
+// with
+//    Ror  dst, x,     d
+// *** OR ***
+// Replace code looking like (x >>> (#bits - d) OP x << d):
+//    Sub  rd,  #bits, d
+//    UShr dst, x,     rd
+//    Shl  tmp, x,     d
+//    OP   dst, dst,   tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,     neg
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  HInstruction* shl_shift = shl->GetRight();
+  HInstruction* ushr_shift = ushr->GetRight();
+  if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) ||
+      (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) {
+    return ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
   HInstruction* obj = null_check->InputAt(0);
   if (!obj->CanBeNull()) {
@@ -530,7 +692,10 @@
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
     RecordSimplification();
     neg->GetBlock()->RemoveInstruction(neg);
+    return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -906,7 +1071,10 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -1027,6 +1195,8 @@
     RecordSimplification();
     return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
@@ -1095,6 +1265,42 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
+  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
+  // TODO: Implement it for MIPS/64.
+  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  switch (instruction_set) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64:
+      break;
+    default:
+      return;
+  }
+  HInstruction* value = invoke->InputAt(0);
+  HInstruction* distance = invoke->InputAt(1);
+  // Replace the invoke with an HRor.
+  if (is_left) {
+    distance = new (GetGraph()->GetArena()) HNeg(distance->GetType(), distance);
+    invoke->GetBlock()->InsertInstructionBefore(distance, invoke);
+  }
+  HRor* ror = new (GetGraph()->GetArena()) HRor(value->GetType(), value, distance);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
+  // Remove ClinitCheck and LoadClass, if possible.
+  HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1);
+  if (clinit->IsClinitCheck() && !clinit->HasUses()) {
+    clinit->GetBlock()->RemoveInstruction(clinit);
+    HInstruction* ldclass = clinit->InputAt(0);
+    if (ldclass->IsLoadClass() && !ldclass->HasUses()) {
+      ldclass->GetBlock()->RemoveInstruction(ldclass);
+    }
+  }
+}
+
 static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
   if (potential_length->IsArrayLength()) {
     return potential_length->InputAt(0) == potential_array;
@@ -1165,6 +1371,12 @@
     SimplifyStringEquals(instruction);
   } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
     SimplifySystemArrayCopy(instruction);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateRight ||
+             instruction->GetIntrinsic() == Intrinsics::kLongRotateRight) {
+    SimplifyRotate(instruction, false);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
+             instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
+    SimplifyRotate(instruction, true);
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6a34b13..6bbc751 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -49,6 +49,7 @@
       GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
   HArm64IntermediateAddress* address =
       new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
+  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
   access->GetBlock()->InsertInstructionBefore(address, access);
   access->ReplaceInput(address, 0);
   // Both instructions must depend on GC to prevent any instruction that can
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 8340811..7127215 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -189,6 +189,42 @@
       return ((method.d.data & kIntrinsicFlagMin) == 0) ?
           Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
 
+    // More math builtins.
+    case kIntrinsicCos:
+      return Intrinsics::kMathCos;
+    case kIntrinsicSin:
+      return Intrinsics::kMathSin;
+    case kIntrinsicAcos:
+      return Intrinsics::kMathAcos;
+    case kIntrinsicAsin:
+      return Intrinsics::kMathAsin;
+    case kIntrinsicAtan:
+      return Intrinsics::kMathAtan;
+    case kIntrinsicAtan2:
+      return Intrinsics::kMathAtan2;
+    case kIntrinsicCbrt:
+      return Intrinsics::kMathCbrt;
+    case kIntrinsicCosh:
+      return Intrinsics::kMathCosh;
+    case kIntrinsicExp:
+      return Intrinsics::kMathExp;
+    case kIntrinsicExpm1:
+      return Intrinsics::kMathExpm1;
+    case kIntrinsicHypot:
+      return Intrinsics::kMathHypot;
+    case kIntrinsicLog:
+      return Intrinsics::kMathLog;
+    case kIntrinsicLog10:
+      return Intrinsics::kMathLog10;
+    case kIntrinsicNextAfter:
+      return Intrinsics::kMathNextAfter;
+    case kIntrinsicSinh:
+      return Intrinsics::kMathSinh;
+    case kIntrinsicTan:
+      return Intrinsics::kMathTan;
+    case kIntrinsicTanh:
+      return Intrinsics::kMathTanh;
+
     // Misc math.
     case kIntrinsicSqrt:
       return Intrinsics::kMathSqrt;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 5329b5c..e8181bb 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -240,178 +240,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
-static void GenIntegerRotate(LocationSummary* locations,
-                             ArmAssembler* assembler,
-                             bool is_left) {
-  Register in = locations->InAt(0).AsRegister<Register>();
-  Location rhs = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
-
-  if (rhs.IsConstant()) {
-    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
-    // so map all rotations to a +ve. equivalent in that range.
-    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
-    if (rot) {
-      // Rotate, mapping left rotations to right equivalents if necessary.
-      // (e.g. left by 2 bits == right by 30.)
-      __ Ror(out, in, is_left ? (0x20 - rot) : rot);
-    } else if (out != in) {
-      __ Mov(out, in);
-    }
-  } else {
-    if (is_left) {
-      __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
-      __ Ror(out, in, out);
-    } else {
-      __ Ror(out, in, rhs.AsRegister<Register>());
-    }
-  }
-}
-
-// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
-// rotates by swapping input regs (effectively rotating by the first 32-bits of
-// a larger rotation) or flipping direction (thus treating larger right/left
-// rotations as sub-word sized rotations in the other direction) as appropriate.
-static void GenLongRotate(LocationSummary* locations,
-                          ArmAssembler* assembler,
-                          bool is_left) {
-  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-  Location rhs = locations->InAt(1);
-  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
-  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-  if (rhs.IsConstant()) {
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
-    // Map all left rotations to right equivalents.
-    if (is_left) {
-      rot = 0x40 - rot;
-    }
-    // Map all rotations to +ve. equivalents on the interval [0,63].
-    rot &= 0x3F;
-    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
-    // logic below to a simple pair of binary orr.
-    // (e.g. 34 bits == in_reg swap + 2 bits right.)
-    if (rot >= 0x20) {
-      rot -= 0x20;
-      std::swap(in_reg_hi, in_reg_lo);
-    }
-    // Rotate, or mov to out for zero or word size rotations.
-    if (rot) {
-      __ Lsr(out_reg_hi, in_reg_hi, rot);
-      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
-      __ Lsr(out_reg_lo, in_reg_lo, rot);
-      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
-    } else {
-      __ Mov(out_reg_lo, in_reg_lo);
-      __ Mov(out_reg_hi, in_reg_hi);
-    }
-  } else {
-    Register shift_left = locations->GetTemp(0).AsRegister<Register>();
-    Register shift_right = locations->GetTemp(1).AsRegister<Register>();
-    Label end;
-    Label right;
-
-    __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
-    __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
-    __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
-
-    if (is_left) {
-      __ b(&right, CS);
-    } else {
-      __ b(&right, CC);
-      std::swap(shift_left, shift_right);
-    }
-
-    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
-    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
-    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ Lsr(shift_left, in_reg_hi, shift_right);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
-    __ b(&end);
-
-    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
-    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
-    __ Bind(&right);
-    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ Lsl(shift_right, in_reg_hi, shift_left);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
-
-    __ Bind(&end);
-  }
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
-}
-
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
@@ -1700,8 +1528,12 @@
 
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
@@ -1718,6 +1550,23 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 962c4d5..6b34daa 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -330,103 +330,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenRotateRight(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           rhs.reg());
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
-static void GenRotateLeft(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t regsize = RegisterFrom(in, type).SizeInBits();
-    uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1);
-    __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Neg(RegisterFrom(out, type),
-           Operand(RegisterFrom(locations->InAt(1), type)));
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           RegisterFrom(out, type));
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
                        vixl::MacroAssembler* masm) {
@@ -1527,11 +1430,33 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 8f1d5e1..96f43a0 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -51,6 +51,23 @@
   V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
   V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \
   V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathCos, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathSin, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathExp, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathLog, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathTan, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \
   V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \
   V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \
   V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 9ecce0e..06fab61 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -989,6 +989,23 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 36e1b20..8aa7d9f 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1730,6 +1730,24 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 5b67cde..fd454d8 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -788,6 +788,195 @@
   __ Bind(&done);
 }
 
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->WillCall());
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  X86Assembler* assembler = codegen->GetAssembler();
+
+  // We need some place to pass the parameters.
+  __ subl(ESP, Immediate(16));
+  __ cfi().AdjustCFAOffset(16);
+
+  // Pass the parameters at the bottom of the stack.
+  __ movsd(Address(ESP, 0), XMM0);
+
+  // If we have a second parameter, pass it next.
+  if (invoke->GetNumberOfArguments() == 2) {
+    __ movsd(Address(ESP, 8), XMM1);
+  }
+
+  // Now do the actual call.
+  __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
+
+  // Extract the return value from the FP stack.
+  __ fstpl(Address(ESP, 0));
+  __ movsd(XMM0, Address(ESP, 0));
+
+  // And clean up the stack.
+  __ addl(ESP, Immediate(16));
+  __ cfi().AdjustCFAOffset(-16);
+
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+                                        HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
 void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1601,12 +1790,27 @@
   Location output_loc = locations->Out();
 
   switch (type) {
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       Register output = output_loc.AsRegister<Register>();
       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      Register output = output_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
     }
@@ -1634,8 +1838,10 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
-                                          bool is_long, bool is_volatile) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type,
+                                          bool is_volatile) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1647,7 +1853,7 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  if (is_long) {
+  if (type == Primitive::kPrimLong) {
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
@@ -1658,25 +1864,30 @@
   } else {
     locations->SetOut(Location::RequiresRegister());
   }
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
 }
 
 
@@ -2277,56 +2488,6 @@
   GenTrailingZeros(assembler, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  Register first_reg = locations->InAt(0).AsRegister<Register>();
-  Location second = locations->InAt(1);
-
-  if (second.IsRegister()) {
-    Register second_reg = second.AsRegister<Register>();
-    if (is_left) {
-      __ roll(first_reg, second_reg);
-    } else {
-      __ rorl(first_reg, second_reg);
-    }
-  } else {
-    Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-    if (is_left) {
-      __ roll(first_reg, imm);
-    } else {
-      __ rorl(first_reg, imm);
-    }
-  }
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ false);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2337,6 +2498,8 @@
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ecd129f..ce737e3 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -703,6 +703,188 @@
   __ Bind(&done);
 }
 
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+  // We have to ensure that the native code doesn't clobber the XMM registers which are
+  // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
+  // saved in the prologue and properly restored.
+  for (auto fp_reg : non_volatile_xmm_regs) {
+    locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+  }
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->WillCall());
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  X86_64Assembler* assembler = codegen->GetAssembler();
+
+  __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+                                        HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+  // We have to ensure that the native code doesn't clobber the XMM registers which are
+  // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
+  // saved in the prologue and properly restored.
+  for (auto fp_reg : non_volatile_xmm_regs) {
+    locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1735,16 +1917,30 @@
   Location offset_loc = locations->InAt(2);
   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
   Location output_loc = locations->Out();
-  CpuRegister output = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister output = output_loc.AsRegister<CpuRegister>();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+      break;
+
+    case Primitive::kPrimNot: {
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
+    }
 
     case Primitive::kPrimLong:
       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
@@ -1756,7 +1952,9 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1769,25 +1967,30 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 
@@ -2295,92 +2498,6 @@
   GenTrailingZeros(assembler, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
-  Location second = locations->InAt(1);
-
-  if (is_long) {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ rolq(first_reg, second_reg);
-      } else {
-        __ rorq(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
-      if (is_left) {
-        __ rolq(first_reg, imm);
-      } else {
-        __ rorq(first_reg, imm);
-      }
-    }
-  } else {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ roll(first_reg, second_reg);
-      } else {
-        __ rorl(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-      if (is_left) {
-        __ roll(first_reg, imm);
-      } else {
-        __ rorl(first_reg, imm);
-      }
-    }
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2390,6 +2507,10 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 2bb769a..956de2c 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -16,7 +16,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -27,7 +26,7 @@
 /**
  * Fixture class for the LICM tests.
  */
-class LICMTest : public testing::Test {
+class LICMTest : public CommonCompilerTest {
  public:
   LICMTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -70,16 +69,16 @@
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
     loop_header_->AddInstruction(new (&allocator_) HIf(parameter_));
     loop_body_->AddInstruction(new (&allocator_) HGoto());
+    return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
   }
 
   // Performs LICM optimizations (after proper set up).
   void PerformLICM() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    TransformToSsa(graph_);
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
-    LICM licm(graph_, side_effects);
-    licm.Run();
+    LICM(graph_, side_effects).Run();
   }
 
   // General building fields.
@@ -169,10 +168,10 @@
 
   // Populate the loop with instructions: set/get array with different types.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong, 0);
+      parameter_, constant_, Primitive::kPrimByte, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, constant_, constant_, Primitive::kPrimInt, 0);
+      parameter_, constant_, constant_, Primitive::kPrimShort, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
@@ -187,10 +186,10 @@
 
   // Populate the loop with instructions: set/get array with same types.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong, 0);
+      parameter_, constant_, Primitive::kPrimByte, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, get_array, constant_, Primitive::kPrimLong, 0);
+      parameter_, get_array, constant_, Primitive::kPrimByte, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index a059766..ed275b1 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -29,13 +29,12 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "pretty_printer.h"
-#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LinearizeTest : public CommonCompilerTest {};
+
 template <size_t number_of_blocks>
 static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) {
   ArenaPool pool;
@@ -46,7 +45,7 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
 
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
@@ -60,7 +59,7 @@
   }
 }
 
-TEST(LinearizeTest, CFG1) {
+TEST_F(LinearizeTest, CFG1) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -85,7 +84,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG2) {
+TEST_F(LinearizeTest, CFG2) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -110,7 +109,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG3) {
+TEST_F(LinearizeTest, CFG3) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -137,7 +136,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG4) {
+TEST_F(LinearizeTest, CFG4) {
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -167,7 +166,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG5) {
+TEST_F(LinearizeTest, CFG5) {
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -197,7 +196,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG6) {
+TEST_F(LinearizeTest, CFG6) {
   //            Block0
   //              |
   //            Block1
@@ -223,7 +222,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG7) {
+TEST_F(LinearizeTest, CFG7) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 7f67560..926f939 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -27,10 +27,10 @@
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LiveRangesTest : public CommonCompilerTest {};
+
 static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
   HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
@@ -39,13 +39,13 @@
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   return graph;
 }
 
-TEST(LiveRangesTest, CFG1) {
+TEST_F(LiveRangesTest, CFG1) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -83,7 +83,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG2) {
+TEST_F(LiveRangesTest, CFG2) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -131,7 +131,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG3) {
+TEST_F(LiveRangesTest, CFG3) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -204,7 +204,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop1) {
+TEST_F(LiveRangesTest, Loop1) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -284,7 +284,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop2) {
+TEST_F(LiveRangesTest, Loop2) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -360,7 +360,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG4) {
+TEST_F(LiveRangesTest, CFG4) {
   /*
    * Test the following snippet:
    *  var a = 0;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 9d7d0b6..7736eed 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -27,10 +27,10 @@
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LivenessTest : public CommonCompilerTest {};
+
 static void DumpBitVector(BitVector* vector,
                           std::ostream& buffer,
                           size_t count,
@@ -51,7 +51,7 @@
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
@@ -75,7 +75,7 @@
   ASSERT_STREQ(expected, buffer.str().c_str());
 }
 
-TEST(LivenessTest, CFG1) {
+TEST_F(LivenessTest, CFG1) {
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -98,7 +98,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG2) {
+TEST_F(LivenessTest, CFG2) {
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -120,7 +120,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG3) {
+TEST_F(LivenessTest, CFG3) {
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (000)\n"
@@ -149,7 +149,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG4) {
+TEST_F(LivenessTest, CFG4) {
   // var a;
   // if (0 == 0) {
   //   a = 5;
@@ -197,7 +197,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG5) {
+TEST_F(LivenessTest, CFG5) {
   // var a = 0;
   // if (0 == 0) {
   // } else {
@@ -242,7 +242,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop1) {
+TEST_F(LivenessTest, Loop1) {
   // Simple loop with one preheader and one back edge.
   // var a = 0;
   // while (a == a) {
@@ -288,7 +288,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop3) {
+TEST_F(LivenessTest, Loop3) {
   // Test that the returned value stays live in a preceding loop.
   // var a = 0;
   // while (a == a) {
@@ -335,7 +335,7 @@
 }
 
 
-TEST(LivenessTest, Loop4) {
+TEST_F(LivenessTest, Loop4) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   // var a = 0;
@@ -387,7 +387,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop5) {
+TEST_F(LivenessTest, Loop5) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
@@ -443,7 +443,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop6) {
+TEST_F(LivenessTest, Loop6) {
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
@@ -494,7 +494,7 @@
 }
 
 
-TEST(LivenessTest, Loop7) {
+TEST_F(LivenessTest, Loop7) {
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2, phi in block 6)
   const char* expected =
@@ -548,7 +548,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop8) {
+TEST_F(LivenessTest, Loop8) {
   // var a = 0;
   // while (a == a) {
   //   a = a + a;
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 389ada7..adde004 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -335,16 +335,24 @@
     return true;
   }
 
-  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) {
-    ReferenceInfo* ref_info = FindReferenceInfoOf(ref);
+  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
     if (ref_info == nullptr) {
       size_t pos = ref_info_array_.size();
-      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos);
+      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
       ref_info_array_.push_back(ref_info);
     }
     return ref_info;
   }
 
+  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+    if (instruction->GetType() != Primitive::kPrimNot) {
+      return;
+    }
+    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+    GetOrCreateReferenceInfo(instruction);
+  }
+
   HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
                                         size_t offset,
                                         HInstruction* index,
@@ -378,6 +386,7 @@
 
   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
     VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
@@ -387,6 +396,7 @@
 
   void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
     VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
@@ -399,6 +409,7 @@
 
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
     VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitArraySet(HArraySet* instruction) OVERRIDE {
@@ -408,7 +419,23 @@
 
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
     // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
-    GetOrCreateReferenceInfo(new_instance);
+    CreateReferenceInfoForReferenceType(new_instance);
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 926bc156..bb0b545 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -198,10 +198,38 @@
   }
 }
 
-void HGraph::TransformToSsa() {
-  DCHECK(!reverse_post_order_.empty());
-  SsaBuilder ssa_builder(this);
-  ssa_builder.BuildSsa();
+BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) {
+  BuildDominatorTree();
+
+  // The SSA builder requires loops to all be natural. Specifically, the dead phi
+  // elimination phase checks the consistency of the graph when doing a post-order
+  // visit for eliminating dead phis: a dead phi can only have loop header phi
+  // users remaining when being visited.
+  BuildSsaResult result = AnalyzeNaturalLoops();
+  if (result != kBuildSsaSuccess) {
+    return result;
+  }
+
+  // Precompute per-block try membership before entering the SSA builder,
+  // which needs the information to build catch block phis from values of
+  // locals at throwing instructions inside try blocks.
+  ComputeTryBlockInformation();
+
+  // Create the inexact Object reference type and store it in the HGraph.
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  inexact_object_rti_ = ReferenceTypeInfo::Create(
+      handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
+      /* is_exact */ false);
+
+  // Tranforms graph to SSA form.
+  result = SsaBuilder(this, handles).BuildSsa();
+  if (result != kBuildSsaSuccess) {
+    return result;
+  }
+
+  in_ssa_form_ = true;
+  return kBuildSsaSuccess;
 }
 
 HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) {
@@ -410,7 +438,7 @@
   }
 }
 
-bool HGraph::AnalyzeNaturalLoops() const {
+BuildSsaResult HGraph::AnalyzeNaturalLoops() const {
   // Order does not matter.
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
@@ -418,16 +446,16 @@
       if (block->IsCatchBlock()) {
         // TODO: Dealing with exceptional back edges could be tricky because
         //       they only approximate the real control flow. Bail out for now.
-        return false;
+        return kBuildSsaFailThrowCatchLoop;
       }
       HLoopInformation* info = block->GetLoopInformation();
       if (!info->Populate()) {
         // Abort if the loop is non natural. We currently bailout in such cases.
-        return false;
+        return kBuildSsaFailNonNaturalLoop;
       }
     }
   }
-  return true;
+  return kBuildSsaSuccess;
 }
 
 void HGraph::InsertConstant(HConstant* constant) {
@@ -446,8 +474,13 @@
   // id and/or any invariants the graph is assuming when adding new instructions.
   if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) {
     cached_null_constant_ = new (arena_) HNullConstant(dex_pc);
+    cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_);
     InsertConstant(cached_null_constant_);
   }
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(cached_null_constant_->GetReferenceTypeInfo().IsValid());
+  }
   return cached_null_constant_;
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3e38e9f..55e436f0 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -98,6 +98,13 @@
   kCondAE,  // >=
 };
 
+enum BuildSsaResult {
+  kBuildSsaFailNonNaturalLoop,
+  kBuildSsaFailThrowCatchLoop,
+  kBuildSsaFailAmbiguousArrayGet,
+  kBuildSsaSuccess,
+};
+
 class HInstructionList : public ValueObject {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
@@ -143,6 +150,122 @@
   DISALLOW_COPY_AND_ASSIGN(HInstructionList);
 };
 
+class ReferenceTypeInfo : ValueObject {
+ public:
+  typedef Handle<mirror::Class> TypeHandle;
+
+  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
+    // The constructor will check that the type_handle is valid.
+    return ReferenceTypeInfo(type_handle, is_exact);
+  }
+
+  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
+
+  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return handle.GetReference() != nullptr;
+  }
+
+  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return IsValidHandle(type_handle_);
+  }
+
+  bool IsExact() const { return is_exact_; }
+
+  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsObjectClass();
+  }
+
+  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsStringClass();
+  }
+
+  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
+  }
+
+  bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsInterface();
+  }
+
+  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass();
+  }
+
+  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    if (!rti.IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+        rti.GetTypeHandle()->GetComponentType());
+  }
+
+  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
+
+  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
+        GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  // Returns true if the type information provide the same amount of details.
+  // Note that it does not mean that the instructions have the same actual type
+  // (because the type can be the result of a merge).
+  bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!IsValid() && !rti.IsValid()) {
+      // Invalid types are equal.
+      return true;
+    }
+    if (!IsValid() || !rti.IsValid()) {
+      // One is valid, the other not.
+      return false;
+    }
+    return IsExact() == rti.IsExact()
+        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+  }
+
+ private:
+  ReferenceTypeInfo();
+  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
+
+  // The class of the object.
+  TypeHandle type_handle_;
+  // Whether or not the type is exact or a superclass of the actual type.
+  // Whether or not we have any information about this type.
+  bool is_exact_;
+};
+
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
 // Control-flow graph of a method. Contains a list of basic blocks.
 class HGraph : public ArenaObject<kArenaAllocGraph> {
  public:
@@ -179,7 +302,8 @@
         cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
-        cached_current_method_(nullptr) {
+        cached_current_method_(nullptr),
+        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -197,36 +321,23 @@
 
   void AddBlock(HBasicBlock* block);
 
-  // Try building the SSA form of this graph, with dominance computation and loop
-  // recognition. Returns whether it was successful in doing all these steps.
-  bool TryBuildingSsa() {
-    BuildDominatorTree();
-    // The SSA builder requires loops to all be natural. Specifically, the dead phi
-    // elimination phase checks the consistency of the graph when doing a post-order
-    // visit for eliminating dead phis: a dead phi can only have loop header phi
-    // users remaining when being visited.
-    if (!AnalyzeNaturalLoops()) return false;
-    // Precompute per-block try membership before entering the SSA builder,
-    // which needs the information to build catch block phis from values of
-    // locals at throwing instructions inside try blocks.
-    ComputeTryBlockInformation();
-    TransformToSsa();
-    in_ssa_form_ = true;
-    return true;
-  }
+  // Try building the SSA form of this graph, with dominance computation and
+  // loop recognition. Returns a code specifying that it was successful or the
+  // reason for failure.
+  BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles);
 
   void ComputeDominanceInformation();
   void ClearDominanceInformation();
 
   void BuildDominatorTree();
-  void TransformToSsa();
   void SimplifyCFG();
   void SimplifyCatchBlocks();
 
-  // Analyze all natural loops in this graph. Returns false if one
-  // loop is not natural, that is the header does not dominate the
-  // back edge.
-  bool AnalyzeNaturalLoops() const;
+  // Analyze all natural loops in this graph. Returns a code specifying that it
+  // was successful or the reason for failure. The method will fail if a loop
+  // is not natural, that is the header does not dominate a back edge, or if it
+  // is a throw-catch loop, i.e. the header is a catch block.
+  BuildSsaResult AnalyzeNaturalLoops() const;
 
   // Iterate over blocks to compute try block membership. Needs reverse post
   // order and loop information.
@@ -487,6 +598,10 @@
   // (such as when the superclass could not be found).
   ArtMethod* art_method_;
 
+  // Keep the RTI of inexact Object to avoid having to pass stack handle
+  // collection pointer to passes which may create NullConstant.
+  ReferenceTypeInfo inexact_object_rti_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1082,6 +1197,7 @@
   M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Ror, BinaryOperation)                                               \
   M(Shl, BinaryOperation)                                               \
   M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
@@ -1673,122 +1789,6 @@
   DISALLOW_COPY_AND_ASSIGN(HEnvironment);
 };
 
-class ReferenceTypeInfo : ValueObject {
- public:
-  typedef Handle<mirror::Class> TypeHandle;
-
-  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
-    // The constructor will check that the type_handle is valid.
-    return ReferenceTypeInfo(type_handle, is_exact);
-  }
-
-  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
-
-  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
-    return handle.GetReference() != nullptr;
-  }
-
-  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsValidHandle(type_handle_);
-  }
-
-  bool IsExact() const { return is_exact_; }
-
-  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsObjectClass();
-  }
-
-  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsStringClass();
-  }
-
-  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
-  }
-
-  bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsInterface();
-  }
-
-  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsArrayClass();
-  }
-
-  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsPrimitiveArray();
-  }
-
-  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
-  }
-
-  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    if (!IsExact()) return false;
-    if (!IsArrayClass()) return false;
-    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    if (!IsExact()) return false;
-    if (!IsArrayClass()) return false;
-    if (!rti.IsArrayClass()) return false;
-    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
-        rti.GetTypeHandle()->GetComponentType());
-  }
-
-  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
-
-  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    DCHECK(rti.IsValid());
-    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    DCHECK(rti.IsValid());
-    return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
-        GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  // Returns true if the type information provide the same amount of details.
-  // Note that it does not mean that the instructions have the same actual type
-  // (because the type can be the result of a merge).
-  bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!IsValid() && !rti.IsValid()) {
-      // Invalid types are equal.
-      return true;
-    }
-    if (!IsValid() || !rti.IsValid()) {
-      // One is valid, the other not.
-      return false;
-    }
-    return IsExact() == rti.IsExact()
-        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
-  }
-
- private:
-  ReferenceTypeInfo();
-  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
-
-  // The class of the object.
-  TypeHandle type_handle_;
-  // Whether or not the type is exact or a superclass of the actual type.
-  // Whether or not we have any information about this type.
-  bool is_exact_;
-};
-
-std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
-
 class HInstruction : public ArenaObject<kArenaAllocInstruction> {
  public:
   HInstruction(SideEffects side_effects, uint32_t dex_pc)
@@ -4198,6 +4198,44 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
+class HRor : public HBinaryOperation {
+ public:
+  HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
+    : HBinaryOperation(result_type, value, distance) {}
+
+  template <typename T, typename U, typename V>
+  T Compute(T x, U y, V max_shift_value) const {
+    static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
+                  "V is not the unsigned integer type corresponding to T");
+    V ux = static_cast<V>(x);
+    if ((y & max_shift_value) == 0) {
+      return static_cast<T>(ux);
+    } else {
+      const V reg_bits = sizeof(T) * 8;
+      return static_cast<T>(ux >> (y & max_shift_value)) |
+                           (x << (reg_bits - (y & max_shift_value)));
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(Ror);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HRor);
+};
+
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
 class HParameterValue : public HExpression<0> {
@@ -4378,7 +4416,16 @@
   void RemoveInputAt(size_t index);
 
   Primitive::Type GetType() const OVERRIDE { return type_; }
-  void SetType(Primitive::Type type) { type_ = type; }
+  void SetType(Primitive::Type new_type) {
+    // Make sure that only valid type changes occur. The following are allowed:
+    //  (1) int  -> float/ref (primitive type propagation),
+    //  (2) long -> double (primitive type propagation).
+    DCHECK(type_ == new_type ||
+           (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+           (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+           (type_ == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+    type_ = new_type;
+  }
 
   bool CanBeNull() const OVERRIDE { return can_be_null_; }
   void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
@@ -4618,7 +4665,21 @@
     return false;
   }
 
-  void SetType(Primitive::Type type) { type_ = type; }
+  bool IsEquivalentOf(HArrayGet* other) const {
+    bool result = (GetDexPc() == other->GetDexPc());
+    if (kIsDebugBuild && result) {
+      DCHECK_EQ(GetBlock(), other->GetBlock());
+      DCHECK_EQ(GetArray(), other->GetArray());
+      DCHECK_EQ(GetIndex(), other->GetIndex());
+      if (Primitive::IsIntOrLongType(GetType())) {
+        DCHECK(Primitive::IsFloatingPointType(other->GetType()));
+      } else {
+        DCHECK(Primitive::IsFloatingPointType(GetType()));
+        DCHECK(Primitive::IsIntOrLongType(other->GetType()));
+      }
+    }
+    return result;
+  }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -4925,9 +4986,13 @@
 
 class HLoadString : public HExpression<1> {
  public:
-  HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc)
+  HLoadString(HCurrentMethod* current_method,
+              uint32_t string_index,
+              uint32_t dex_pc,
+              bool is_in_dex_cache)
       : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
-        string_index_(string_index) {
+        string_index_(string_index),
+        is_in_dex_cache_(is_in_dex_cache) {
     SetRawInputAt(0, current_method);
   }
 
@@ -4945,6 +5010,7 @@
   bool NeedsEnvironment() const OVERRIDE { return false; }
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; }
   bool CanBeNull() const OVERRIDE { return false; }
+  bool IsInDexCache() const { return is_in_dex_cache_; }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -4954,6 +5020,7 @@
 
  private:
   const uint32_t string_index_;
+  const bool is_in_dex_cache_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadString);
 };
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index e843935..18405f2 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 
+#include "nodes.h"
+
 namespace art {
 
 class HArm64DataProcWithShifterOp : public HExpression<2> {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 831b626..ba43518 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -501,11 +501,8 @@
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
                              const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  ScopedThreadSuspension sts(soa.Self(), kNative);
-
+                             PassObserver* pass_observer,
+                             StackHandleScopeCollection* handles) {
   ArenaAllocator* arena = graph->GetArena();
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -522,29 +519,23 @@
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
-  ReferenceTypePropagation* type_propagation =
-      new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_after_types");
-  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_bce");
-  InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
+  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
   HOptimization* optimizations1[] = {
     intrinsics,
+    sharpening,
     fold1,
     simplify1,
-    type_propagation,
-    sharpening,
     dce1,
-    simplify2
   };
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
+  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
 
   HOptimization* optimizations2[] = {
     // BooleanSimplifier depends on the InstructionSimplifier removing
@@ -557,13 +548,13 @@
     induction,
     bce,
     fold3,  // evaluates code generated by dynamic bce
-    simplify3,
+    simplify2,
     lse,
     dce2,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
-    simplify4,
+    simplify3,
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
@@ -768,14 +759,29 @@
   }
 
   VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
+
   if (run_optimizations_) {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScopeCollection handles(soa.Self());
+    ScopedThreadSuspension sts(soa.Self(), kNative);
+
     {
       PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      if (!graph->TryBuildingSsa()) {
-        // We could not transform the graph to SSA, bailout.
-        LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
-            << ": it contains a non natural loop";
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
+      BuildSsaResult result = graph->TryBuildingSsa(&handles);
+      if (result != kBuildSsaSuccess) {
+        switch (result) {
+          case kBuildSsaFailNonNaturalLoop:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop);
+            break;
+          case kBuildSsaFailThrowCatchLoop:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+            break;
+          case kBuildSsaFailAmbiguousArrayGet:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayGet);
+            break;
+          case kBuildSsaSuccess:
+            UNREACHABLE();
+        }
         pass_observer.SetGraphInBadState();
         return nullptr;
       }
@@ -786,7 +792,8 @@
                      compiler_driver,
                      compilation_stats_.get(),
                      dex_compilation_unit,
-                     &pass_observer);
+                     &pass_observer,
+                     &handles);
     codegen->CompileOptimized(code_allocator);
   } else {
     codegen->CompileBaseline(code_allocator);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6296eed..4713514 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,9 @@
   kRemovedDeadInstruction,
   kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
-  kNotCompiledCannotBuildSSA,
+  kNotCompiledNonNaturalLoop,
+  kNotCompiledThrowCatchLoop,
+  kNotCompiledAmbiguousArrayGet,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -104,7 +106,9 @@
       case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
       case kRemovedNullCheck: name = "RemovedNullCheck"; break;
       case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
-      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break;
+      case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
+      case kNotCompiledAmbiguousArrayGet : name = "NotCompiledAmbiguousArrayGet"; break;
       case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
       case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
       case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 350f0b1..af3a005 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -19,9 +19,13 @@
 
 #include "nodes.h"
 #include "builder.h"
+#include "common_compiler_test.h"
 #include "compiler/dex/pass_manager.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 
 #include "gtest/gtest.h"
@@ -42,7 +46,6 @@
 #define FIVE_REGISTERS_CODE_ITEM(...)  N_REGISTERS_CODE_ITEM(5, __VA_ARGS__)
 #define SIX_REGISTERS_CODE_ITEM(...)   N_REGISTERS_CODE_ITEM(6, __VA_ARGS__)
 
-
 LiveInterval* BuildInterval(const size_t ranges[][2],
                             size_t number_of_ranges,
                             ArenaAllocator* allocator,
@@ -111,6 +114,12 @@
   return instruction->GetBlock() == nullptr;
 }
 
+inline void TransformToSsa(HGraph* graph) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess);
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
deleted file mode 100644
index bde54ee..0000000
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "primitive_type_propagation.h"
-
-#include "nodes.h"
-#include "ssa_builder.h"
-
-namespace art {
-
-static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
-  // We trust the verifier has already done the necessary checking.
-  switch (existing) {
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimNot:
-      return existing;
-    default:
-      // Phis are initialized with a void type, so if we are asked
-      // to merge with a void type, we should use the existing one.
-      return new_type == Primitive::kPrimVoid
-          ? existing
-          : HPhi::ToPhiType(new_type);
-  }
-}
-
-// Re-compute and update the type of the instruction. Returns
-// whether or not the type was changed.
-bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
-  DCHECK(phi->IsLive());
-  Primitive::Type existing = phi->GetType();
-
-  Primitive::Type new_type = existing;
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    Primitive::Type input_type = phi->InputAt(i)->GetType();
-    new_type = MergeTypes(new_type, input_type);
-  }
-  phi->SetType(new_type);
-
-  if (new_type == Primitive::kPrimDouble
-      || new_type == Primitive::kPrimFloat
-      || new_type == Primitive::kPrimNot) {
-    // If the phi is of floating point type, we need to update its inputs to that
-    // type. For inputs that are phis, we need to recompute their types.
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      HInstruction* input = phi->InputAt(i);
-      if (input->GetType() != new_type) {
-        HInstruction* equivalent = (new_type == Primitive::kPrimNot)
-            ? SsaBuilder::GetReferenceTypeEquivalent(input)
-            : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
-        phi->ReplaceInput(equivalent, i);
-        if (equivalent->IsPhi()) {
-          AddToWorklist(equivalent->AsPhi());
-        } else if (equivalent == input) {
-          // The input has changed its type. It can be an input of other phis,
-          // so we need to put phi users in the work list.
-          AddDependentInstructionsToWorklist(equivalent);
-        }
-      }
-    }
-  }
-
-  return existing != new_type;
-}
-
-void PrimitiveTypePropagation::Run() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-}
-
-void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  if (block->IsLoopHeader()) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      if (phi->IsLive()) {
-        AddToWorklist(phi);
-      }
-    }
-  } else {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      // Eagerly compute the type of the phi, for quicker convergence. Note
-      // that we don't need to add users to the worklist because we are
-      // doing a reverse post-order visit, therefore either the phi users are
-      // non-loop phi and will be visited later in the visit, or are loop-phis,
-      // and they are already in the work list.
-      HPhi* phi = it.Current()->AsPhi();
-      if (phi->IsLive()) {
-        UpdateType(phi);
-      }
-    }
-  }
-}
-
-void PrimitiveTypePropagation::ProcessWorklist() {
-  while (!worklist_.empty()) {
-    HPhi* instruction = worklist_.back();
-    worklist_.pop_back();
-    if (UpdateType(instruction)) {
-      AddDependentInstructionsToWorklist(instruction);
-    }
-  }
-}
-
-void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
-  DCHECK(instruction->IsLive());
-  worklist_.push_back(instruction);
-}
-
-void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) {
-      AddToWorklist(phi);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h
deleted file mode 100644
index 212fcfc..0000000
--- a/compiler/optimizing/primitive_type_propagation.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-#define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-
-#include "base/arena_containers.h"
-#include "nodes.h"
-
-namespace art {
-
-// Compute and propagate primitive types of phis in the graph.
-class PrimitiveTypePropagation : public ValueObject {
- public:
-  explicit PrimitiveTypePropagation(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) {
-    worklist_.reserve(kDefaultWorklistSize);
-  }
-
-  void Run();
-
- private:
-  void VisitBasicBlock(HBasicBlock* block);
-  void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HInstruction* instruction);
-  bool UpdateType(HPhi* phi);
-
-  HGraph* const graph_;
-  ArenaVector<HPhi*> worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
-
-  DISALLOW_COPY_AND_ASSIGN(PrimitiveTypePropagation);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index fea903d..94a297c 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -40,7 +40,6 @@
       throwable_class_handle_(throwable_class_handle),
       worklist_(worklist) {}
 
-  void VisitNullConstant(HNullConstant* null_constant) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
@@ -71,8 +70,6 @@
   ReferenceTypeInfo::TypeHandle string_class_handle_;
   ReferenceTypeInfo::TypeHandle throwable_class_handle_;
   ArenaVector<HInstruction*>* worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
 };
 
 ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
@@ -171,9 +168,13 @@
   ScopedObjectAccess soa(Thread::Current());
   for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
     for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
-      HInstruction* instr = it.Current();
-      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-        fn(instr);
+      HPhi* phi = it.Current()->AsPhi();
+      // Note that the graph may contain dead phis when run from the SsaBuilder.
+      // Skip those as they might have a type conflict and will be removed anyway.
+      if (phi->IsLive() &&
+          phi->GetType() == Primitive::kPrimNot &&
+          !phi->GetReferenceTypeInfo().IsValid()) {
+        fn(phi);
       }
     }
     for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
@@ -376,6 +377,75 @@
   }
 }
 
+// Returns true if one of the patterns below has been recognized. If so, the
+// InstanceOf instruction together with the true branch of `ifInstruction` will
+// be returned using the out parameters.
+// Recognized patterns:
+//   (1) patterns equivalent to `if (obj instanceof X)`
+//     (a) InstanceOf -> Equal to 1 -> If
+//     (b) InstanceOf -> NotEqual to 0 -> If
+//     (c) InstanceOf -> If
+//   (2) patterns equivalent to `if (!(obj instanceof X))`
+//     (a) InstanceOf -> Equal to 0 -> If
+//     (b) InstanceOf -> NotEqual to 1 -> If
+//     (c) InstanceOf -> BooleanNot -> If
+static bool MatchIfInstanceOf(HIf* ifInstruction,
+                              /* out */ HInstanceOf** instanceOf,
+                              /* out */ HBasicBlock** trueBranch) {
+  HInstruction* input = ifInstruction->InputAt(0);
+
+  if (input->IsEqual()) {
+    HInstruction* rhs = input->AsEqual()->GetConstantRight();
+    if (rhs != nullptr) {
+      HInstruction* lhs = input->AsEqual()->GetLeastConstantLeft();
+      if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+        if (rhs->AsIntConstant()->IsOne()) {
+          // Case (1a)
+          *trueBranch = ifInstruction->IfTrueSuccessor();
+        } else {
+          // Case (2a)
+          DCHECK(rhs->AsIntConstant()->IsZero());
+          *trueBranch = ifInstruction->IfFalseSuccessor();
+        }
+        *instanceOf = lhs->AsInstanceOf();
+        return true;
+      }
+    }
+  } else if (input->IsNotEqual()) {
+    HInstruction* rhs = input->AsNotEqual()->GetConstantRight();
+    if (rhs != nullptr) {
+      HInstruction* lhs = input->AsNotEqual()->GetLeastConstantLeft();
+      if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+        if (rhs->AsIntConstant()->IsZero()) {
+          // Case (1b)
+          *trueBranch = ifInstruction->IfTrueSuccessor();
+        } else {
+          // Case (2b)
+          DCHECK(rhs->AsIntConstant()->IsOne());
+          *trueBranch = ifInstruction->IfFalseSuccessor();
+        }
+        *instanceOf = lhs->AsInstanceOf();
+        return true;
+      }
+    }
+  } else if (input->IsInstanceOf()) {
+    // Case (1c)
+    *instanceOf = input->AsInstanceOf();
+    *trueBranch = ifInstruction->IfTrueSuccessor();
+    return true;
+  } else if (input->IsBooleanNot()) {
+    HInstruction* not_input = input->InputAt(0);
+    if (not_input->IsInstanceOf()) {
+      // Case (2c)
+      *instanceOf = not_input->AsInstanceOf();
+      *trueBranch = ifInstruction->IfFalseSuccessor();
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // Detects if `block` is the True block for the pattern
 // `if (x instanceof ClassX) { }`
 // If that's the case insert an HBoundType instruction to bound the type of `x`
@@ -385,22 +455,11 @@
   if (ifInstruction == nullptr) {
     return;
   }
-  HInstruction* ifInput = ifInstruction->InputAt(0);
-  HInstruction* instanceOf = nullptr;
-  HBasicBlock* instanceOfTrueBlock = nullptr;
 
-  // The instruction simplifier has transformed:
-  //   - `if (a instanceof A)` into an HIf with an HInstanceOf input
-  //   - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn
-  //     has an HInstanceOf input)
-  // So we should not see the usual HEqual here.
-  if (ifInput->IsInstanceOf()) {
-    instanceOf = ifInput;
-    instanceOfTrueBlock = ifInstruction->IfTrueSuccessor();
-  } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) {
-    instanceOf = ifInput->InputAt(0);
-    instanceOfTrueBlock = ifInstruction->IfFalseSuccessor();
-  } else {
+  // Try to recognize common `if (instanceof)` and `if (!instanceof)` patterns.
+  HInstanceOf* instanceOf = nullptr;
+  HBasicBlock* instanceOfTrueBlock = nullptr;
+  if (!MatchIfInstanceOf(ifInstruction, &instanceOf, &instanceOfTrueBlock)) {
     return;
   }
 
@@ -505,13 +564,6 @@
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
 
-void RTPVisitor::VisitNullConstant(HNullConstant* instr) {
-  // TODO: The null constant could be bound contextually (e.g. based on return statements)
-  // to a more precise type.
-  instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
-}
-
 void RTPVisitor::VisitNewInstance(HNewInstance* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
@@ -523,7 +575,11 @@
 static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false);
+      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, /* allow_failure */ true);
+  if (dex_cache == nullptr) {
+    // Dex cache could not be found. This should only happen during gtests.
+    return nullptr;
+  }
   // Get type from dex cache assuming it was populated by the verifier.
   return dex_cache->GetResolvedType(type_idx);
 }
@@ -540,17 +596,24 @@
 
 void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
                                            const FieldInfo& info) {
-  // The field index is unknown only during tests.
-  if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) {
+  if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
 
   ScopedObjectAccess soa(Thread::Current());
-  ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
-  // TODO: There are certain cases where we can't resolve the field.
-  // b/21914925 is open to keep track of a repro case for this issue.
-  mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>();
+  mirror::Class* klass = nullptr;
+
+  // The field index is unknown only during tests.
+  if (info.GetFieldIndex() != kUnknownFieldIndex) {
+    ClassLinker* cl = Runtime::Current()->GetClassLinker();
+    ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
+    // TODO: There are certain cases where we can't resolve the field.
+    // b/21914925 is open to keep track of a repro case for this issue.
+    if (field != nullptr) {
+      klass = field->GetType<false>();
+    }
+  }
+
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
@@ -666,7 +729,7 @@
 }
 
 void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
-  if (phi->GetType() != Primitive::kPrimNot) {
+  if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) {
     return;
   }
 
@@ -824,6 +887,8 @@
 // NullConstant inputs are ignored during merging as they do not provide any useful information.
 // If all the inputs are NullConstants then the type of the phi will be set to Object.
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
+  DCHECK(instr->IsLive());
+
   size_t input_count = instr->InputCount();
   size_t first_input_index_not_null = 0;
   while (first_input_index_not_null < input_count &&
@@ -868,7 +933,7 @@
 // Re-computes and updates the nullability of the instruction. Returns whether or
 // not the nullability was changed.
 bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
-  DCHECK(instr->IsPhi()
+  DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive())
       || instr->IsBoundType()
       || instr->IsNullCheck()
       || instr->IsArrayGet());
@@ -916,7 +981,7 @@
 void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
   for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* user = it.Current()->GetUser();
-    if (user->IsPhi()
+    if ((user->IsPhi() && user->AsPhi()->IsLive())
        || user->IsBoundType()
        || user->IsNullCheck()
        || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 080f970..b900ed0 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -28,13 +28,13 @@
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
 // Note: the register allocator tests rely on the fact that constants have live
 // intervals and registers get allocated to them.
 
+class RegisterAllocatorTest : public CommonCompilerTest {};
+
 static bool Check(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -42,7 +42,7 @@
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -57,7 +57,7 @@
  * Unit testing of RegisterAllocator::ValidateIntervals. Register allocator
  * tests are based on this validation method.
  */
-TEST(RegisterAllocatorTest, ValidateIntervals) {
+TEST_F(RegisterAllocatorTest, ValidateIntervals) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateGraph(&allocator);
@@ -146,7 +146,7 @@
   }
 }
 
-TEST(RegisterAllocatorTest, CFG1) {
+TEST_F(RegisterAllocatorTest, CFG1) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -166,7 +166,7 @@
   ASSERT_TRUE(Check(data));
 }
 
-TEST(RegisterAllocatorTest, Loop1) {
+TEST_F(RegisterAllocatorTest, Loop1) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -205,7 +205,7 @@
   ASSERT_TRUE(Check(data));
 }
 
-TEST(RegisterAllocatorTest, Loop2) {
+TEST_F(RegisterAllocatorTest, Loop2) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -259,11 +259,11 @@
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   return graph;
 }
 
-TEST(RegisterAllocatorTest, Loop3) {
+TEST_F(RegisterAllocatorTest, Loop3) {
   /*
    * Test the following snippet:
    *  int a = 0
@@ -326,7 +326,7 @@
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
-TEST(RegisterAllocatorTest, FirstRegisterUse) {
+TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8,
@@ -366,7 +366,7 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
-TEST(RegisterAllocatorTest, DeadPhi) {
+TEST_F(RegisterAllocatorTest, DeadPhi) {
   /* Test for a dead loop phi taking as back-edge input a phi that also has
    * this loop phi as input. Walking backwards in SsaDeadPhiElimination
    * does not solve the problem because the loop phi will be visited last.
@@ -407,7 +407,7 @@
  * that share the same register. It should split the interval it is currently
  * allocating for at the minimum lifetime position between the two inactive intervals.
  */
-TEST(RegisterAllocatorTest, FreeUntil) {
+TEST_F(RegisterAllocatorTest, FreeUntil) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
@@ -539,7 +539,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, PhiHint) {
+TEST_F(RegisterAllocatorTest, PhiHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HPhi *phi;
@@ -658,7 +658,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *field, *ret;
@@ -726,7 +726,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *first_sub, *second_sub;
@@ -795,7 +795,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *div;
@@ -819,7 +819,7 @@
 // Test a bug in the register allocator, where allocating a blocked
 // register would lead to spilling an inactive interval at the wrong
 // position.
-TEST(RegisterAllocatorTest, SpillInactive) {
+TEST_F(RegisterAllocatorTest, SpillInactive) {
   ArenaPool pool;
 
   // Create a synthesized graph to please the register_allocator and
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 9e6cfbe..9e869e1 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -17,214 +17,11 @@
 #include "ssa_builder.h"
 
 #include "nodes.h"
-#include "primitive_type_propagation.h"
+#include "reference_type_propagation.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
-// Returns whether this is a loop header phi which was eagerly created but later
-// found inconsistent due to the vreg being undefined in one of its predecessors.
-// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
-static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
-  return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
-}
-
-/**
- * A debuggable application may require to reviving phis, to ensure their
- * associated DEX register is available to a debugger. This class implements
- * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It
- * also makes sure that phis with incompatible input types are not revived
- * (statement (b) of the SsaBuilder).
- *
- * This phase must be run after detecting dead phis through the
- * DeadPhiElimination phase, and before deleting the dead phis.
- */
-class DeadPhiHandling : public ValueObject {
- public:
-  explicit DeadPhiHandling(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    worklist_.reserve(kDefaultWorklistSize);
-  }
-
-  void Run();
-
- private:
-  void VisitBasicBlock(HBasicBlock* block);
-  void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HPhi* phi);
-  bool UpdateType(HPhi* phi);
-
-  HGraph* const graph_;
-  ArenaVector<HPhi*> worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
-
-  DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
-};
-
-static bool HasConflictingEquivalent(HPhi* phi) {
-  if (phi->GetNext() == nullptr) {
-    return false;
-  }
-  HPhi* next = phi->GetNext()->AsPhi();
-  if (next->GetRegNumber() == phi->GetRegNumber()) {
-    if (next->GetType() == Primitive::kPrimVoid) {
-      // We only get a void type for an equivalent phi we processed and found out
-      // it was conflicting.
-      return true;
-    } else {
-      // Go to the next phi, in case it is also an equivalent.
-      return HasConflictingEquivalent(next);
-    }
-  }
-  return false;
-}
-
-bool DeadPhiHandling::UpdateType(HPhi* phi) {
-  if (phi->IsDead()) {
-    // Phi was rendered dead while waiting in the worklist because it was replaced
-    // with an equivalent.
-    return false;
-  }
-
-  Primitive::Type existing = phi->GetType();
-
-  bool conflict = false;
-  Primitive::Type new_type = existing;
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    HInstruction* input = phi->InputAt(i);
-    if (input->IsPhi() && input->AsPhi()->IsDead()) {
-      // We are doing a reverse post order visit of the graph, reviving
-      // phis that have environment uses and updating their types. If an
-      // input is a phi, and it is dead (because its input types are
-      // conflicting), this phi must be marked dead as well.
-      conflict = true;
-      break;
-    }
-    Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
-
-    // The only acceptable transitions are:
-    // - From void to typed: first time we update the type of this phi.
-    // - From int to reference (or reference to int): the phi has to change
-    //   to reference type. If the integer input cannot be converted to a
-    //   reference input, the phi will remain dead.
-    if (new_type == Primitive::kPrimVoid) {
-      new_type = input_type;
-    } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
-      if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) {
-        // If we already asked for an equivalent of the input phi, but that equivalent
-        // ended up conflicting, make this phi conflicting too.
-        conflict = true;
-        break;
-      }
-      HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
-      if (equivalent == nullptr) {
-        conflict = true;
-        break;
-      }
-      phi->ReplaceInput(equivalent, i);
-      if (equivalent->IsPhi()) {
-        DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
-        // We created a new phi, but that phi has the same inputs as the old phi. We
-        // add it to the worklist to ensure its inputs can also be converted to reference.
-        // If not, it will remain dead, and the algorithm will make the current phi dead
-        // as well.
-        equivalent->AsPhi()->SetLive();
-        AddToWorklist(equivalent->AsPhi());
-      }
-    } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
-      new_type = Primitive::kPrimNot;
-      // Start over, we may request reference equivalents for the inputs of the phi.
-      i = -1;
-    } else if (new_type != input_type) {
-      conflict = true;
-      break;
-    }
-  }
-
-  if (conflict) {
-    phi->SetType(Primitive::kPrimVoid);
-    phi->SetDead();
-    return true;
-  } else if (existing == new_type) {
-    return false;
-  }
-
-  DCHECK(phi->IsLive());
-  phi->SetType(new_type);
-
-  // There might exist a `new_type` equivalent of `phi` already. In that case,
-  // we replace the equivalent with the, now live, `phi`.
-  HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType();
-  if (equivalent != nullptr) {
-    // There cannot be more than two equivalents with the same type.
-    DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr);
-    // If doing fix-point iteration, the equivalent might be in `worklist_`.
-    // Setting it dead will make UpdateType skip it.
-    equivalent->SetDead();
-    equivalent->ReplaceWith(phi);
-  }
-
-  return true;
-}
-
-void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
-  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->AsPhi();
-    if (IsUndefinedLoopHeaderPhi(phi)) {
-      DCHECK(phi->IsDead());
-      continue;
-    }
-    if (phi->IsDead() && phi->HasEnvironmentUses()) {
-      phi->SetLive();
-      if (block->IsLoopHeader()) {
-        // Loop phis must have a type to guarantee convergence of the algorithm.
-        DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
-        AddToWorklist(phi);
-      } else {
-        // Because we are doing a reverse post order visit, all inputs of
-        // this phi have been visited and therefore had their (initial) type set.
-        UpdateType(phi);
-      }
-    }
-  }
-}
-
-void DeadPhiHandling::ProcessWorklist() {
-  while (!worklist_.empty()) {
-    HPhi* instruction = worklist_.back();
-    worklist_.pop_back();
-    // Note that the same equivalent phi can be added multiple times in the work list, if
-    // used by multiple phis. The first call to `UpdateType` will know whether the phi is
-    // dead or live.
-    if (instruction->IsLive() && UpdateType(instruction)) {
-      AddDependentInstructionsToWorklist(instruction);
-    }
-  }
-}
-
-void DeadPhiHandling::AddToWorklist(HPhi* instruction) {
-  DCHECK(instruction->IsLive());
-  worklist_.push_back(instruction);
-}
-
-void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr && !phi->IsDead()) {
-      AddToWorklist(phi);
-    }
-  }
-}
-
-void DeadPhiHandling::Run() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-}
-
 void SsaBuilder::SetLoopHeaderPhiInputs() {
   for (size_t i = loop_headers_.size(); i > 0; --i) {
     HBasicBlock* block = loop_headers_[i - 1];
@@ -285,10 +82,11 @@
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
       if (next != nullptr) {
-        // Make sure we do not replace a live phi with a dead phi. A live phi has been
-        // handled by the type propagation phase, unlike a dead phi.
+        // Make sure we do not replace a live phi with a dead phi. A live phi
+        // has been handled by the type propagation phase, unlike a dead phi.
         if (next->IsLive()) {
           phi->ReplaceWith(next);
+          phi->SetDead();
         } else {
           next->ReplaceWith(phi);
         }
@@ -300,64 +98,7 @@
   }
 }
 
-void SsaBuilder::BuildSsa() {
-  // 1) Visit in reverse post order. We need to have all predecessors of a block visited
-  // (with the exception of loops) in order to create the right environment for that
-  // block. For loops, we create phis whose inputs will be set in 2).
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-
-  // 2) Set inputs of loop phis.
-  SetLoopHeaderPhiInputs();
-
-  // 3) Mark dead phis. This will mark phis that are only used by environments:
-  // at the DEX level, the type of these phis does not need to be consistent, but
-  // our code generator will complain if the inputs of a phi do not have the same
-  // type. The marking allows the type propagation to know which phis it needs
-  // to handle. We mark but do not eliminate: the elimination will be done in
-  // step 9).
-  SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph());
-  dead_phis_for_type_propagation.MarkDeadPhis();
-
-  // 4) Propagate types of phis. At this point, phis are typed void in the general
-  // case, or float/double/reference when we created an equivalent phi. So we
-  // need to propagate the types across phis to give them a correct type.
-  PrimitiveTypePropagation type_propagation(GetGraph());
-  type_propagation.Run();
-
-  // 5) When creating equivalent phis we copy the inputs of the original phi which
-  // may be improperly typed. This was fixed during the type propagation in 4) but
-  // as a result we may end up with two equivalent phis with the same type for
-  // the same dex register. This pass cleans them up.
-  EquivalentPhisCleanup();
-
-  // 6) Mark dead phis again. Step 4) may have introduced new phis.
-  // Step 5) might enable the death of new phis.
-  SsaDeadPhiElimination dead_phis(GetGraph());
-  dead_phis.MarkDeadPhis();
-
-  // 7) Now that the graph is correctly typed, we can get rid of redundant phis.
-  // Note that we cannot do this phase before type propagation, otherwise
-  // we could get rid of phi equivalents, whose presence is a requirement for the
-  // type propagation phase. Note that this is to satisfy statement (a) of the
-  // SsaBuilder (see ssa_builder.h).
-  SsaRedundantPhiElimination redundant_phi(GetGraph());
-  redundant_phi.Run();
-
-  // 8) Fix the type for null constants which are part of an equality comparison.
-  // We need to do this after redundant phi elimination, to ensure the only cases
-  // that we can see are reference comparison against 0. The redundant phi
-  // elimination ensures we do not see a phi taking two 0 constants in a HEqual
-  // or HNotEqual.
-  FixNullConstantType();
-
-  // 9) Make sure environments use the right phi "equivalent": a phi marked dead
-  // can have a phi equivalent that is not dead. We must therefore update
-  // all environment uses of the dead phi to use its equivalent. Note that there
-  // can be multiple phis for the same Dex register that are live (for example
-  // when merging constants), in which case it is OK for the environments
-  // to just reference one.
+void SsaBuilder::FixEnvironmentPhis() {
   for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
@@ -378,24 +119,345 @@
       phi->ReplaceWith(next);
     }
   }
+}
 
-  // 10) Deal with phis to guarantee liveness of phis in case of a debuggable
-  // application. This is for satisfying statement (c) of the SsaBuilder
-  // (see ssa_builder.h).
-  if (GetGraph()->IsDebuggable()) {
-    DeadPhiHandling dead_phi_handler(GetGraph());
-    dead_phi_handler.Run();
+static void AddDependentInstructionsToWorklist(HInstruction* instruction,
+                                               ArenaVector<HPhi*>* worklist) {
+  // If `instruction` is a dead phi, type conflict was just identified. All its
+  // live phi users, and transitively users of those users, therefore need to be
+  // marked dead/conflicting too, so we add them to the worklist. Otherwise we
+  // add users whose type does not match and needs to be updated.
+  bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead();
+  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+    HInstruction* user = it.Current()->GetUser();
+    if (user->IsPhi() && user->AsPhi()->IsLive()) {
+      if (add_all_live_phis || user->GetType() != instruction->GetType()) {
+        worklist->push_back(user->AsPhi());
+      }
+    }
+  }
+}
+
+// Find a candidate primitive type for `phi` by merging the type of its inputs.
+// Return false if conflict is identified.
+static bool TypePhiFromInputs(HPhi* phi) {
+  Primitive::Type common_type = phi->GetType();
+
+  for (HInputIterator it(phi); !it.Done(); it.Advance()) {
+    HInstruction* input = it.Current();
+    if (input->IsPhi() && input->AsPhi()->IsDead()) {
+      // Phis are constructed live so if an input is a dead phi, it must have
+      // been made dead due to type conflict. Mark this phi conflicting too.
+      return false;
+    }
+
+    Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
+    if (common_type == input_type) {
+      // No change in type.
+    } else if (Primitive::ComponentSize(common_type) != Primitive::ComponentSize(input_type)) {
+      // Types are of different sizes, e.g. int vs. long. Must be a conflict.
+      return false;
+    } else if (Primitive::IsIntegralType(common_type)) {
+      // Previous inputs were integral, this one is not but is of the same size.
+      // This does not imply conflict since some bytecode instruction types are
+      // ambiguous. TypeInputsOfPhi will either type them or detect a conflict.
+      DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot);
+      common_type = input_type;
+    } else if (Primitive::IsIntegralType(input_type)) {
+      // Input is integral, common type is not. Same as in the previous case, if
+      // there is a conflict, it will be detected during TypeInputsOfPhi.
+      DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot);
+    } else {
+      // Combining float and reference types. Clearly a conflict.
+      DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) ||
+             (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat));
+      return false;
+    }
   }
 
-  // 11) Now that the right phis are used for the environments, and we
-  // have potentially revive dead phis in case of a debuggable application,
-  // we can eliminate phis we do not need. Regardless of the debuggable status,
-  // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h),
-  // as well as for the code generation, which does not deal with phis of conflicting
-  // input types.
-  dead_phis.EliminateDeadPhis();
+  // We have found a candidate type for the phi. Set it and return true. We may
+  // still discover conflict whilst typing the individual inputs in TypeInputsOfPhi.
+  phi->SetType(common_type);
+  return true;
+}
 
-  // 12) Clear locals.
+// Replace inputs of `phi` to match its type. Return false if conflict is identified.
+bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+  Primitive::Type common_type = phi->GetType();
+  if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) {
+    // Phi either contains only other untyped phis (common_type == kPrimVoid),
+    // or `common_type` is integral and we do not need to retype ambiguous inputs
+    // because they are always constructed with the integral type candidate.
+    if (kIsDebugBuild) {
+      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+        HInstruction* input = phi->InputAt(i);
+        if (common_type == Primitive::kPrimVoid) {
+          DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
+        } else {
+          DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) ||
+                 HPhi::ToPhiType(input->GetType()) == common_type);
+        }
+      }
+    }
+    // Inputs did not need to be replaced, hence no conflict. Report success.
+    return true;
+  } else {
+    DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
+    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+      HInstruction* input = phi->InputAt(i);
+      if (input->GetType() != common_type) {
+        // Input type does not match phi's type. Try to retype the input or
+        // generate a suitably typed equivalent.
+        HInstruction* equivalent = (common_type == Primitive::kPrimNot)
+            ? GetReferenceTypeEquivalent(input)
+            : GetFloatOrDoubleEquivalent(input, common_type);
+        if (equivalent == nullptr) {
+          // Input could not be typed. Report conflict.
+          return false;
+        }
+        // Make sure the input did not change its type and we do not need to
+        // update its users.
+        DCHECK_NE(input, equivalent);
+
+        phi->ReplaceInput(equivalent, i);
+        if (equivalent->IsPhi()) {
+          worklist->push_back(equivalent->AsPhi());
+        }
+      }
+    }
+    // All inputs either matched the type of the phi or we successfully replaced
+    // them with a suitable equivalent. Report success.
+    return true;
+  }
+}
+
+// Attempt to set the primitive type of `phi` to match its inputs. Return whether
+// it was changed by the algorithm or not.
+bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+  DCHECK(phi->IsLive());
+  Primitive::Type original_type = phi->GetType();
+
+  // Try to type the phi in two stages:
+  // (1) find a candidate type for the phi by merging types of all its inputs,
+  // (2) try to type the phi's inputs to that candidate type.
+  // Either of these stages may detect a type conflict and fail, in which case
+  // we immediately abort.
+  if (!TypePhiFromInputs(phi) || !TypeInputsOfPhi(phi, worklist)) {
+    // Conflict detected. Mark the phi dead and return true because it changed.
+    phi->SetDead();
+    return true;
+  }
+
+  // Return true if the type of the phi has changed.
+  return phi->GetType() != original_type;
+}
+
+void SsaBuilder::RunPrimitiveTypePropagation() {
+  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsLoopHeader()) {
+      for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* phi = phi_it.Current()->AsPhi();
+        if (phi->IsLive()) {
+          worklist.push_back(phi);
+        }
+      }
+    } else {
+      for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        // Eagerly compute the type of the phi, for quicker convergence. Note
+        // that we don't need to add users to the worklist because we are
+        // doing a reverse post-order visit, therefore either the phi users are
+        // non-loop phi and will be visited later in the visit, or are loop-phis,
+        // and they are already in the work list.
+        HPhi* phi = phi_it.Current()->AsPhi();
+        if (phi->IsLive()) {
+          UpdatePrimitiveType(phi, &worklist);
+        }
+      }
+    }
+  }
+
+  ProcessPrimitiveTypePropagationWorklist(&worklist);
+  EquivalentPhisCleanup();
+}
+
+void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) {
+  // Process worklist
+  while (!worklist->empty()) {
+    HPhi* phi = worklist->back();
+    worklist->pop_back();
+    // The phi could have been made dead as a result of conflicts while in the
+    // worklist. If it is now dead, there is no point in updating its type.
+    if (phi->IsLive() && UpdatePrimitiveType(phi, worklist)) {
+      AddDependentInstructionsToWorklist(phi, worklist);
+    }
+  }
+}
+
+static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(Primitive::IsIntOrLongType(type));
+  HArrayGet* next = aget->GetNext()->AsArrayGet();
+  return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr;
+}
+
+static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(Primitive::IsIntOrLongType(type));
+  DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr);
+
+  HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet(
+      aget->GetArray(),
+      aget->GetIndex(),
+      type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble,
+      aget->GetDexPc());
+  aget->GetBlock()->InsertInstructionAfter(equivalent, aget);
+  return equivalent;
+}
+
+// Returns true if the array input of `aget` is either of type int[] or long[].
+// Should only be called on ArrayGets with ambiguous type (int/float, long/double)
+// on arrays which were typed to an array class by RTP.
+static bool IsArrayGetOnIntegralArray(HArrayGet* aget) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ReferenceTypeInfo array_type = aget->GetArray()->GetReferenceTypeInfo();
+  DCHECK(array_type.IsPrimitiveArrayClass());
+  ReferenceTypeInfo::TypeHandle array_type_handle = array_type.GetTypeHandle();
+
+  bool is_integral_type;
+  if (Primitive::Is64BitType(aget->GetType())) {
+    is_integral_type = array_type_handle->GetComponentType()->IsPrimitiveLong();
+    DCHECK(is_integral_type || array_type_handle->GetComponentType()->IsPrimitiveDouble());
+  } else {
+    is_integral_type = array_type_handle->GetComponentType()->IsPrimitiveInt();
+    DCHECK(is_integral_type || array_type_handle->GetComponentType()->IsPrimitiveFloat());
+  }
+  return is_integral_type;
+}
+
+bool SsaBuilder::FixAmbiguousArrayGets() {
+  if (ambiguous_agets_.empty()) {
+    return true;
+  }
+
+  // The wrong ArrayGet equivalent may still have Phi uses coming from ArraySet
+  // uses (because they are untyped) and environment uses (if --debuggable).
+  // After resolving all ambiguous ArrayGets, we will re-run primitive type
+  // propagation on the Phis which need to be updated.
+  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+
+    for (HArrayGet* aget_int : ambiguous_agets_) {
+      if (!aget_int->GetArray()->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
+        // RTP did not type the input array. Bail.
+        return false;
+      }
+
+      HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int);
+      if (IsArrayGetOnIntegralArray(aget_int)) {
+        if (aget_float != nullptr) {
+          // There is a float/double equivalent. We must replace it and re-run
+          // primitive type propagation on all dependent instructions.
+          aget_float->ReplaceWith(aget_int);
+          aget_float->GetBlock()->RemoveInstruction(aget_float);
+          AddDependentInstructionsToWorklist(aget_int, &worklist);
+        }
+      } else {
+        if (aget_float == nullptr) {
+          // This is a float/double ArrayGet but there were no typed uses which
+          // would create the typed equivalent. Create it now.
+          aget_float = CreateFloatOrDoubleEquivalentOfArrayGet(aget_int);
+        }
+        // Replace the original int/long instruction. Note that it may have phi
+        // uses, environment uses, as well as real uses (from untyped ArraySets).
+        // We need to re-run primitive type propagation on its dependent instructions.
+        aget_int->ReplaceWith(aget_float);
+        aget_int->GetBlock()->RemoveInstruction(aget_int);
+        AddDependentInstructionsToWorklist(aget_float, &worklist);
+      }
+    }
+  }
+
+  // Set a flag stating that types of ArrayGets have been resolved. This is used
+  // by GetFloatOrDoubleEquivalentOfArrayGet to report conflict.
+  agets_fixed_ = true;
+
+  if (!worklist.empty()) {
+    ProcessPrimitiveTypePropagationWorklist(&worklist);
+    EquivalentPhisCleanup();
+  }
+
+  return true;
+}
+
+BuildSsaResult SsaBuilder::BuildSsa() {
+  // 1) Visit in reverse post order. We need to have all predecessors of a block
+  // visited (with the exception of loops) in order to create the right environment
+  // for that block. For loops, we create phis whose inputs will be set in 2).
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+
+  // 2) Set inputs of loop header phis.
+  SetLoopHeaderPhiInputs();
+
+  // 3) Propagate types of phis. At this point, phis are typed void in the general
+  // case, or float/double/reference if we created an equivalent phi. So we need
+  // to propagate the types across phis to give them a correct type. If a type
+  // conflict is detected in this stage, the phi is marked dead.
+  RunPrimitiveTypePropagation();
+
+  // 4) Now that the correct primitive types have been assigned, we can get rid
+  // of redundant phis. Note that we cannot do this phase before type propagation,
+  // otherwise we could get rid of phi equivalents, whose presence is a requirement
+  // for the type propagation phase. Note that this is to satisfy statement (a)
+  // of the SsaBuilder (see ssa_builder.h).
+  SsaRedundantPhiElimination(GetGraph()).Run();
+
+  // 5) Fix the type for null constants which are part of an equality comparison.
+  // We need to do this after redundant phi elimination, to ensure the only cases
+  // that we can see are reference comparison against 0. The redundant phi
+  // elimination ensures we do not see a phi taking two 0 constants in a HEqual
+  // or HNotEqual.
+  FixNullConstantType();
+
+  // 6) Compute type of reference type instructions. The pass assumes that
+  // NullConstant has been fixed up.
+  ReferenceTypePropagation(GetGraph(), handles_).Run();
+
+  // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
+  // or long/double). Now that RTP computed the type of the array input, the
+  // ambiguity can be resolved and the correct equivalent kept.
+  if (!FixAmbiguousArrayGets()) {
+    return kBuildSsaFailAmbiguousArrayGet;
+  }
+
+  // 8) Mark dead phis. This will mark phis which are not used by instructions
+  // or other live phis. If compiling as debuggable code, phis will also be kept
+  // live if they have an environment use.
+  SsaDeadPhiElimination dead_phi_elimimation(GetGraph());
+  dead_phi_elimimation.MarkDeadPhis();
+
+  // 9) Make sure environments use the right phi equivalent: a phi marked dead
+  // can have a phi equivalent that is not dead. In that case we have to replace
+  // it with the live equivalent because deoptimization and try/catch rely on
+  // environments containing values of all live vregs at that point. Note that
+  // there can be multiple phis for the same Dex register that are live
+  // (for example when merging constants), in which case it is okay for the
+  // environments to just reference one.
+  FixEnvironmentPhis();
+
+  // 10) Now that the right phis are used for the environments, we can eliminate
+  // phis we do not need. Regardless of the debuggable status, this phase is
+  /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
+  // as for the code generation, which does not deal with phis of conflicting
+  // input types.
+  dead_phi_elimimation.EliminateDeadPhis();
+
+  // 11) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -404,6 +466,8 @@
       current->GetBlock()->RemoveInstruction(current);
     }
   }
+
+  return kBuildSsaSuccess;
 }
 
 ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
@@ -591,6 +655,8 @@
  * phi with a floating point / reference type.
  */
 HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+  DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one.";
+
   // We place the floating point /reference phi next to this phi.
   HInstruction* next = phi->GetNext();
   if (next != nullptr
@@ -606,35 +672,50 @@
     ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      // Copy the inputs. Note that the graph may not be correctly typed by doing this copy,
-      // but the type propagation phase will fix it.
+      // Copy the inputs. Note that the graph may not be correctly typed
+      // by doing this copy, but the type propagation phase will fix it.
       new_phi->SetRawInputAt(i, phi->InputAt(i));
     }
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
+    DCHECK(new_phi->IsLive());
     return new_phi;
   } else {
+    // An existing equivalent was found. If it is dead, conflict was previously
+    // identified and we return nullptr instead.
     HPhi* next_phi = next->AsPhi();
     DCHECK_EQ(next_phi->GetType(), type);
-    if (next_phi->IsDead()) {
-      // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
-      // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
-      // cannot revive undefined loop header phis because they cannot have uses.
-      DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
-      next_phi->SetLive();
-    }
-    return next_phi;
+    return next_phi->IsLive() ? next_phi : nullptr;
   }
 }
 
-HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
-                                                     HInstruction* value,
-                                                     Primitive::Type type) {
+HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  DCHECK(Primitive::IsIntegralType(aget->GetType()));
+
+  if (!Primitive::IsIntOrLongType(aget->GetType())) {
+    // Cannot type boolean, char, byte, short to float/double.
+    return nullptr;
+  }
+
+  DCHECK(ContainsElement(ambiguous_agets_, aget));
+  if (agets_fixed_) {
+    // This used to be an ambiguous ArrayGet but its type has been resolved to
+    // int/long. Requesting a float/double equivalent should lead to a conflict.
+    if (kIsDebugBuild) {
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(IsArrayGetOnIntegralArray(aget));
+    }
+    return nullptr;
+  } else {
+    // This is an ambiguous ArrayGet which has not been resolved yet. Return an
+    // equivalent float/double instruction to use until it is resolved.
+    HArrayGet* equivalent = FindFloatOrDoubleEquivalentOfArrayGet(aget);
+    return (equivalent == nullptr) ? CreateFloatOrDoubleEquivalentOfArrayGet(aget) : equivalent;
+  }
+}
+
+HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) {
   if (value->IsArrayGet()) {
-    // The verifier has checked that values in arrays cannot be used for both
-    // floating point and non-floating point operations. It is therefore safe to just
-    // change the type of the operation.
-    value->AsArrayGet()->SetType(type);
-    return value;
+    return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet());
   } else if (value->IsLongConstant()) {
     return GetDoubleEquivalent(value->AsLongConstant());
   } else if (value->IsIntConstant()) {
@@ -642,12 +723,7 @@
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
   } else {
-    // For other instructions, we assume the verifier has checked that the dex format is correctly
-    // typed and the value in a dex register will not be used for both floating point and
-    // non-floating point operations. So the only reason an instruction would want a floating
-    // point equivalent is for an unused phi that will be removed by the dead phi elimination phase.
-    DCHECK(user->IsPhi()) << "is actually " << user->DebugName() << " (" << user->GetId() << ")";
-    return value;
+    return nullptr;
   }
 }
 
@@ -662,15 +738,17 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
+  Primitive::Type load_type = load->GetType();
   HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
   // If the operation requests a specific type, we make sure its input is of that type.
-  if (load->GetType() != value->GetType()) {
-    if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
-      value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
-    } else if (load->GetType() == Primitive::kPrimNot) {
+  if (load_type != value->GetType()) {
+    if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) {
+      value = GetFloatOrDoubleEquivalent(value, load_type);
+    } else if (load_type == Primitive::kPrimNot) {
       value = GetReferenceTypeEquivalent(value);
     }
   }
+
   load->ReplaceWith(value);
   load->GetBlock()->RemoveInstruction(load);
 }
@@ -760,4 +838,13 @@
   temp->GetBlock()->RemoveInstruction(temp);
 }
 
+void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(!Primitive::IsFloatingPointType(type));
+  if (Primitive::IsIntOrLongType(type)) {
+    ambiguous_agets_.push_back(aget);
+  }
+  VisitInstruction(aget);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index dcce5e4..ed6f5ca 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -49,17 +49,20 @@
  */
 class SsaBuilder : public HGraphVisitor {
  public:
-  explicit SsaBuilder(HGraph* graph)
+  explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
       : HGraphVisitor(graph),
+        handles_(handles),
+        agets_fixed_(false),
         current_locals_(nullptr),
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
     loop_headers_.reserve(kDefaultNumberOfLoops);
   }
 
-  void BuildSsa();
+  BuildSsaResult BuildSsa();
 
   // Returns locals vector for `block`. If it is a catch block, the vector will be
   // prepopulated with catch phis for vregs which are defined in `current_locals_`.
@@ -71,23 +74,38 @@
   void VisitStoreLocal(HStoreLocal* store);
   void VisitInstruction(HInstruction* instruction);
   void VisitTemporary(HTemporary* instruction);
-
-  static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user,
-                                                  HInstruction* instruction,
-                                                  Primitive::Type type);
-
-  static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+  void VisitArrayGet(HArrayGet* aget);
 
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
  private:
   void SetLoopHeaderPhiInputs();
+  void FixEnvironmentPhis();
   void FixNullConstantType();
   void EquivalentPhisCleanup();
+  void RunPrimitiveTypePropagation();
 
-  static HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
-  static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
-  static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+  // Attempts to resolve types of aget and aget-wide instructions from reference
+  // type information on the input array. Returns false if the type of the array
+  // is unknown.
+  bool FixAmbiguousArrayGets();
+
+  bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist);
+  bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
+  void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
+
+  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
+  HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
+  HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
+  HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+  HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
+
+  StackHandleScopeCollection* const handles_;
+
+  // True if types of ambiguous ArrayGets have been resolved.
+  bool agets_fixed_;
 
   // Locals for the current block being visited.
   ArenaVector<HInstruction*>* current_locals_;
@@ -96,6 +114,8 @@
   // over these blocks to set the inputs of their phis.
   ArenaVector<HBasicBlock*> loop_headers_;
 
+  ArenaVector<HArrayGet*> ambiguous_agets_;
+
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index a3219dc..63aba88 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -40,15 +40,17 @@
         continue;
       }
 
-      bool has_non_phi_use = false;
-      for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        if (!use_it.Current()->GetUser()->IsPhi()) {
-          has_non_phi_use = true;
-          break;
+      bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses());
+      if (!keep_alive) {
+        for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
+          if (!use_it.Current()->GetUser()->IsPhi()) {
+            keep_alive = true;
+            break;
+          }
         }
       }
 
-      if (has_non_phi_use) {
+      if (keep_alive) {
         worklist_.push_back(phi);
       } else {
         phi->SetDead();
@@ -94,8 +96,8 @@
           for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
                use_it.Advance()) {
             HInstruction* user = use_it.Current()->GetUser();
-            DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
-            DCHECK(user->AsPhi()->IsDead()) << user->GetId();
+            DCHECK(user->IsLoopHeaderPhi());
+            DCHECK(user->AsPhi()->IsDead());
           }
         }
         // Remove the phi from use lists of its inputs.
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 024278f..d2885a8 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -28,6 +28,8 @@
 
 namespace art {
 
+class SsaTest : public CommonCompilerTest {};
+
 class SsaPrettyPrinter : public HPrettyPrinter {
  public:
   explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
@@ -83,11 +85,10 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  graph->BuildDominatorTree();
+  TransformToSsa(graph);
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->TransformToSsa();
   ReNumberInstructions(graph);
 
   // Test that phis had their type set.
@@ -103,7 +104,7 @@
   ASSERT_STREQ(expected, printer.str().c_str());
 }
 
-TEST(SsaTest, CFG1) {
+TEST_F(SsaTest, CFG1) {
   // Test that we get rid of loads and stores.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -131,7 +132,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, CFG2) {
+TEST_F(SsaTest, CFG2) {
   // Test that we create a phi for the join block of an if control flow instruction
   // when there is only code in the else branch.
   const char* expected =
@@ -162,7 +163,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, CFG3) {
+TEST_F(SsaTest, CFG3) {
   // Test that we create a phi for the join block of an if control flow instruction
   // when both branches update a local.
   const char* expected =
@@ -195,7 +196,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop1) {
+TEST_F(SsaTest, Loop1) {
   // Test that we create a phi for an initialized local at entry of a loop.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -228,7 +229,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop2) {
+TEST_F(SsaTest, Loop2) {
   // Simple loop with one preheader and one back edge.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -258,7 +259,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop3) {
+TEST_F(SsaTest, Loop3) {
   // Test that a local not yet defined at the entry of a loop is handled properly.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -290,7 +291,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop4) {
+TEST_F(SsaTest, Loop4) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   const char* expected =
@@ -325,7 +326,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop5) {
+TEST_F(SsaTest, Loop5) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   const char* expected =
@@ -367,7 +368,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop6) {
+TEST_F(SsaTest, Loop6) {
   // Test a loop with one preheader and two back edges (e.g. continue).
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -406,7 +407,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop7) {
+TEST_F(SsaTest, Loop7) {
   // Test a loop with one preheader, one back edge, and two exit edges (e.g. break).
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -448,7 +449,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, DeadLocal) {
+TEST_F(SsaTest, DeadLocal) {
   // Test that we correctly handle a local not being used.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -466,7 +467,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, LocalInIf) {
+TEST_F(SsaTest, LocalInIf) {
   // Test that we do not create a phi in the join block when one predecessor
   // does not update the local.
   const char* expected =
@@ -496,7 +497,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, MultiplePredecessors) {
+TEST_F(SsaTest, MultiplePredecessors) {
   // Test that we do not create a phi when one predecessor
   // does not update the local.
   const char* expected =
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 9fb2245..66d5ac3 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -25,12 +25,12 @@
 namespace art {
 
 // Create code that will invoke the function held in thread local storage.
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<4> entry_point_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<8> entry_point_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa,
+                                               EntryPointCallingConvention abi,
+                                               ThreadOffset<4> entry_point_offset);
+const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa,
+                                               EntryPointCallingConvention abi,
+                                               ThreadOffset<8> entry_point_offset);
 
 }  // namespace art
 
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index f10acf9..77f8d6c 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -57,18 +57,18 @@
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static))
+        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2aa4085..6fae8e4 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -71,6 +71,7 @@
 #include "oat_writer.h"
 #include "os.h"
 #include "runtime.h"
+#include "runtime_options.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
@@ -208,6 +209,11 @@
   UsageError("  --image=<file.art>: specifies the output image filename.");
   UsageError("      Example: --image=/system/framework/boot.art");
   UsageError("");
+  UsageError("  --image-format=(uncompressed|lz4):");
+  UsageError("      Which format to store the image.");
+  UsageError("      Example: --image-format=lz4");
+  UsageError("      Default: uncompressed");
+  UsageError("");
   UsageError("  --image-classes=<classname-file>: specifies classes to include in an image.");
   UsageError("      Example: --image=frameworks/base/preloaded-classes");
   UsageError("");
@@ -490,6 +496,7 @@
       image_base_(0U),
       image_classes_zip_filename_(nullptr),
       image_classes_filename_(nullptr),
+      image_storage_mode_(ImageHeader::kStorageModeUncompressed),
       compiled_classes_zip_filename_(nullptr),
       compiled_classes_filename_(nullptr),
       compiled_methods_zip_filename_(nullptr),
@@ -621,6 +628,19 @@
     }
   }
 
+  void ParseImageFormat(const StringPiece& option) {
+    const StringPiece substr("--image-format=");
+    DCHECK(option.starts_with(substr));
+    const StringPiece format_str = option.substr(substr.length());
+    if (format_str == "lz4") {
+      image_storage_mode_ = ImageHeader::kStorageModeLZ4;
+    } else if (format_str == "uncompressed") {
+      image_storage_mode_ = ImageHeader::kStorageModeUncompressed;
+    } else {
+      Usage("Unknown image format: %s", format_str.data());
+    }
+  }
+
   void ProcessOptions(ParserOptions* parser_options) {
     boot_image_ = !image_filename_.empty();
     app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
@@ -667,15 +687,14 @@
       parser_options->boot_image_filename += "/framework/boot.art";
     }
     if (!parser_options->boot_image_filename.empty()) {
-      boot_image_option_ += "-Ximage:";
-      boot_image_option_ += parser_options->boot_image_filename;
+      boot_image_filename_ = parser_options->boot_image_filename;
     }
 
     if (image_classes_filename_ != nullptr && !IsBootImage()) {
       Usage("--image-classes should only be used with --image");
     }
 
-    if (image_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+    if (image_classes_filename_ != nullptr && !boot_image_filename_.empty()) {
       Usage("--image-classes should not be used with --boot-image");
     }
 
@@ -687,7 +706,7 @@
       Usage("--compiled-classes should only be used with --image");
     }
 
-    if (compiled_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+    if (compiled_classes_filename_ != nullptr && !boot_image_filename_.empty()) {
       Usage("--compiled-classes should not be used with --boot-image");
     }
 
@@ -719,7 +738,7 @@
       Usage("--zip-location should be supplied with --zip-fd");
     }
 
-    if (boot_image_option_.empty()) {
+    if (boot_image_filename_.empty()) {
       if (image_base_ == 0) {
         Usage("Non-zero --base not specified");
       }
@@ -877,6 +896,8 @@
         image_classes_filename_ = option.substr(strlen("--image-classes=")).data();
       } else if (option.starts_with("--image-classes-zip=")) {
         image_classes_zip_filename_ = option.substr(strlen("--image-classes-zip=")).data();
+      } else if (option.starts_with("--image-format=")) {
+        ParseImageFormat(option);
       } else if (option.starts_with("--compiled-classes=")) {
         compiled_classes_filename_ = option.substr(strlen("--compiled-classes=")).data();
       } else if (option.starts_with("--compiled-classes-zip=")) {
@@ -1014,20 +1035,10 @@
   // boot class path.
   bool Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
-    RuntimeOptions runtime_options;
     art::MemMap::Init();  // For ZipEntry::ExtractToMemMap.
-    if (boot_image_option_.empty()) {
-      std::string boot_class_path = "-Xbootclasspath:";
-      boot_class_path += Join(dex_filenames_, ':');
-      runtime_options.push_back(std::make_pair(boot_class_path, nullptr));
-      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
-      boot_class_path_locations += Join(dex_locations_, ':');
-      runtime_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
-    } else {
-      runtime_options.push_back(std::make_pair(boot_image_option_, nullptr));
-    }
-    for (size_t i = 0; i < runtime_args_.size(); i++) {
-      runtime_options.push_back(std::make_pair(runtime_args_[i], nullptr));
+
+    if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) {
+      return false;
     }
 
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
@@ -1037,23 +1048,15 @@
         IsBootImage() ?
             CompilerCallbacks::CallbackMode::kCompileBootImage :
             CompilerCallbacks::CallbackMode::kCompileApp));
-    runtime_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
-    runtime_options.push_back(
-        std::make_pair("imageinstructionset", GetInstructionSetString(instruction_set_)));
 
-    // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
-    // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
-    // have been stripped in preopting, anyways).
-    if (!IsBootImage()) {
-      runtime_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
+    RuntimeArgumentMap runtime_options;
+    if (!PrepareRuntimeOptions(&runtime_options)) {
+      return false;
     }
-    // Disable libsigchain. We don't don't need it during compilation and it prevents us
-    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
-    runtime_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
 
     {
       TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
-      if (!CreateRuntime(runtime_options)) {
+      if (!CreateRuntime(std::move(runtime_options))) {
         return false;
       }
     }
@@ -1068,66 +1071,8 @@
     // Whilst we're in native take the opportunity to initialize well known classes.
     WellKnownClasses::Init(self->GetJniEnv());
 
-    // If --image-classes was specified, calculate the full list of classes to include in the image
-    if (image_classes_filename_ != nullptr) {
-      std::string error_msg;
-      if (image_classes_zip_filename_ != nullptr) {
-        image_classes_.reset(ReadImageClassesFromZip(image_classes_zip_filename_,
-                                                     image_classes_filename_,
-                                                     &error_msg));
-      } else {
-        image_classes_.reset(ReadImageClassesFromFile(image_classes_filename_));
-      }
-      if (image_classes_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of image classes from '" << image_classes_filename_ <<
-            "': " << error_msg;
-        return false;
-      }
-    } else if (IsBootImage()) {
-      image_classes_.reset(new std::unordered_set<std::string>);
-    }
-    // If --compiled-classes was specified, calculate the full list of classes to compile in the
-    // image.
-    if (compiled_classes_filename_ != nullptr) {
-      std::string error_msg;
-      if (compiled_classes_zip_filename_ != nullptr) {
-        compiled_classes_.reset(ReadImageClassesFromZip(compiled_classes_zip_filename_,
-                                                        compiled_classes_filename_,
-                                                        &error_msg));
-      } else {
-        compiled_classes_.reset(ReadImageClassesFromFile(compiled_classes_filename_));
-      }
-      if (compiled_classes_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of compiled classes from '"
-                   << compiled_classes_filename_ << "': " << error_msg;
-        return false;
-      }
-    } else {
-      compiled_classes_.reset(nullptr);  // By default compile everything.
-    }
-    // If --compiled-methods was specified, read the methods to compile from the given file(s).
-    if (compiled_methods_filename_ != nullptr) {
-      std::string error_msg;
-      if (compiled_methods_zip_filename_ != nullptr) {
-        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
-                                                          compiled_methods_filename_,
-                                                          nullptr,            // No post-processing.
-                                                          &error_msg));
-      } else {
-        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
-                                                           nullptr));         // No post-processing.
-      }
-      if (compiled_methods_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of compiled methods from '"
-            << compiled_methods_filename_ << "': " << error_msg;
-        return false;
-      }
-    } else {
-      compiled_methods_.reset(nullptr);  // By default compile everything.
-    }
-
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_option_.empty()) {
+    if (boot_image_filename_.empty()) {
       dex_files_ = class_linker->GetBootClassPath();
     } else {
       TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
@@ -1164,22 +1109,7 @@
 
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
-        for (size_t i = 0; i < dex_files_.size(); ++i) {
-          const DexFile* dex_file = dex_files_[i];
-          std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex",
-                                                 getpid(), i));
-          std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
-          if (tmp_file.get() == nullptr) {
-            PLOG(ERROR) << "Failed to open file " << tmp_file_name
-                << ". Try: adb shell chmod 777 /data/local/tmp";
-            continue;
-          }
-          // This is just dumping files for debugging. Ignore errors, and leave remnants.
-          UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
-          UNUSED(tmp_file->Flush());
-          UNUSED(tmp_file->Close());
-          LOG(INFO) << "Wrote input to " << tmp_file_name;
-        }
+        SaveDexInput();
       }
     }
     // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
@@ -1238,16 +1168,13 @@
     jobject class_path_class_loader = nullptr;
     Thread* self = Thread::Current();
 
-    if (!boot_image_option_.empty()) {
+    if (!boot_image_filename_.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
       ScopedObjectAccess soa(self);
 
       // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files;
-      for (auto& class_path_file : class_path_files_) {
-        class_path_files.push_back(class_path_file.get());
-      }
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
 
       // Store the classpath we have right now.
       key_value_store_->Put(OatHeader::kClassPathKey,
@@ -1432,14 +1359,9 @@
       elf_writer->EndText(text);
 
       elf_writer->SetBssSize(oat_writer->GetBssSize());
-
       elf_writer->WriteDynamicSection();
-
-      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer->GetMethodDebugInfo());
-      elf_writer->WriteDebugInfo(method_infos);
-
-      ArrayRef<const uintptr_t> patch_locations(oat_writer->GetAbsolutePatchLocations());
-      elf_writer->WritePatchLocations(patch_locations);
+      elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+      elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
 
       if (!elf_writer->End()) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
@@ -1552,6 +1474,16 @@
   }
 
  private:
+  template <typename T>
+  static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
+    std::vector<T*> result;
+    result.reserve(src.size());
+    for (const std::unique_ptr<T>& t : src) {
+      result.push_back(t.get());
+    }
+    return result;
+  }
+
   static size_t OpenDexFiles(const std::vector<const char*>& dex_filenames,
                              const std::vector<const char*>& dex_locations,
                              std::vector<std::unique_ptr<const DexFile>>* dex_files) {
@@ -1612,10 +1544,138 @@
     }
   }
 
+  bool PrepareImageClasses() {
+    // If --image-classes was specified, calculate the full list of classes to include in the image.
+    if (image_classes_filename_ != nullptr) {
+      image_classes_ =
+          ReadClasses(image_classes_zip_filename_, image_classes_filename_, "image");
+      if (image_classes_ == nullptr) {
+        return false;
+      }
+    } else if (IsBootImage()) {
+      image_classes_.reset(new std::unordered_set<std::string>);
+    }
+    return true;
+  }
+
+  bool PrepareCompiledClasses() {
+    // If --compiled-classes was specified, calculate the full list of classes to compile in the
+    // image.
+    if (compiled_classes_filename_ != nullptr) {
+      compiled_classes_ =
+          ReadClasses(compiled_classes_zip_filename_, compiled_classes_filename_, "compiled");
+      if (compiled_classes_ == nullptr) {
+        return false;
+      }
+    } else {
+      compiled_classes_.reset(nullptr);  // By default compile everything.
+    }
+    return true;
+  }
+
+  static std::unique_ptr<std::unordered_set<std::string>> ReadClasses(const char* zip_filename,
+                                                                      const char* classes_filename,
+                                                                      const char* tag) {
+    std::unique_ptr<std::unordered_set<std::string>> classes;
+    std::string error_msg;
+    if (zip_filename != nullptr) {
+      classes.reset(ReadImageClassesFromZip(zip_filename, classes_filename, &error_msg));
+    } else {
+      classes.reset(ReadImageClassesFromFile(classes_filename));
+    }
+    if (classes == nullptr) {
+      LOG(ERROR) << "Failed to create list of " << tag << " classes from '"
+                 << classes_filename << "': " << error_msg;
+    }
+    return classes;
+  }
+
+  bool PrepareCompiledMethods() {
+    // If --compiled-methods was specified, read the methods to compile from the given file(s).
+    if (compiled_methods_filename_ != nullptr) {
+      std::string error_msg;
+      if (compiled_methods_zip_filename_ != nullptr) {
+        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
+                                                          compiled_methods_filename_,
+                                                          nullptr,            // No post-processing.
+                                                          &error_msg));
+      } else {
+        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
+                                                           nullptr));         // No post-processing.
+      }
+      if (compiled_methods_.get() == nullptr) {
+        LOG(ERROR) << "Failed to create list of compiled methods from '"
+            << compiled_methods_filename_ << "': " << error_msg;
+        return false;
+      }
+    } else {
+      compiled_methods_.reset(nullptr);  // By default compile everything.
+    }
+    return true;
+  }
+
+  void SaveDexInput() {
+    for (size_t i = 0; i < dex_files_.size(); ++i) {
+      const DexFile* dex_file = dex_files_[i];
+      std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex",
+                                             getpid(), i));
+      std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
+      if (tmp_file.get() == nullptr) {
+        PLOG(ERROR) << "Failed to open file " << tmp_file_name
+            << ". Try: adb shell chmod 777 /data/local/tmp";
+        continue;
+      }
+      // This is just dumping files for debugging. Ignore errors, and leave remnants.
+      UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
+      UNUSED(tmp_file->Flush());
+      UNUSED(tmp_file->Close());
+      LOG(INFO) << "Wrote input to " << tmp_file_name;
+    }
+  }
+
+  bool PrepareRuntimeOptions(RuntimeArgumentMap* runtime_options) {
+    RuntimeOptions raw_options;
+    if (boot_image_filename_.empty()) {
+      std::string boot_class_path = "-Xbootclasspath:";
+      boot_class_path += Join(dex_filenames_, ':');
+      raw_options.push_back(std::make_pair(boot_class_path, nullptr));
+      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
+      boot_class_path_locations += Join(dex_locations_, ':');
+      raw_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
+    } else {
+      std::string boot_image_option = "-Ximage:";
+      boot_image_option += boot_image_filename_;
+      raw_options.push_back(std::make_pair(boot_image_option, nullptr));
+    }
+    for (size_t i = 0; i < runtime_args_.size(); i++) {
+      raw_options.push_back(std::make_pair(runtime_args_[i], nullptr));
+    }
+
+    raw_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+    raw_options.push_back(
+        std::make_pair("imageinstructionset", GetInstructionSetString(instruction_set_)));
+
+    // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
+    // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
+    // have been stripped in preopting, anyways).
+    if (!IsBootImage()) {
+      raw_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
+    }
+    // Disable libsigchain. We don't don't need it during compilation and it prevents us
+    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
+    raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
+
+    if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
+      LOG(ERROR) << "Failed to parse runtime options";
+      return false;
+    }
+    return true;
+  }
+
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(const RuntimeOptions& runtime_options)
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
-    if (!Runtime::Create(runtime_options, false)) {
+    if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
     }
@@ -1643,7 +1703,8 @@
     image_writer_.reset(new ImageWriter(*driver_,
                                         image_base,
                                         compiler_options_->GetCompilePic(),
-                                        IsAppImage()));
+                                        IsAppImage(),
+                                        image_storage_mode_));
   }
 
   // Let the ImageWriter write the image file. If we do not compile PIC, also fix up the oat file.
@@ -1812,12 +1873,13 @@
   std::vector<const char*> dex_locations_;
   int zip_fd_;
   std::string zip_location_;
-  std::string boot_image_option_;
+  std::string boot_image_filename_;
   std::vector<const char*> runtime_args_;
   std::string image_filename_;
   uintptr_t image_base_;
   const char* image_classes_zip_filename_;
   const char* image_classes_filename_;
+  ImageHeader::StorageMode image_storage_mode_;
   const char* compiled_classes_zip_filename_;
   const char* compiled_classes_filename_;
   const char* compiled_methods_zip_filename_;
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 81fb33c..1a2f2c2 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -27,7 +27,6 @@
  * Differences between XML output and the "current.xml" file:
  * - classes in same package are not all grouped together; nothing is sorted
  * - no "deprecated" on fields and methods
- * - no "value" on fields
  * - no parameter names
  * - no generic signatures on parameters, e.g. type="java.lang.Class&lt;?&gt;"
  * - class shows declared fields and methods; does not show inherited fields
@@ -1018,9 +1017,126 @@
 }
 
 /*
+ * Dumps a string value with some escape characters.
+ */
+static void dumpEscapedString(const char* p) {
+  for (; *p; p++) {
+    switch (*p) {
+      case '\\':
+        fputs("\\\\", gOutFile);
+        break;
+      case '\"':
+        fputs("\\\"", gOutFile);
+        break;
+      case '\t':
+        fputs("\\t", gOutFile);
+        break;
+      case '\n':
+        fputs("\\n", gOutFile);
+        break;
+      case '\r':
+        fputs("\\r", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }
+  }
+}
+
+/*
+ * Dumps an XML attribute value between double-quotes.
+ */
+static void dumpXmlAttribute(const char* p) {
+  for (; *p; p++) {
+    switch (*p) {
+      case '&':
+        fputs("&amp;", gOutFile);
+        break;
+      case '<':
+        fputs("&lt;", gOutFile);
+        break;
+      case '"':
+        fputs("&quot;", gOutFile);
+        break;
+      case '\t':
+        fputs("&#x9;", gOutFile);
+        break;
+      case '\n':
+        fputs("&#xA;", gOutFile);
+        break;
+      case '\r':
+        fputs("&#xD;", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }
+  }
+}
+
+/*
+ * Dumps a value of static (class) field.
+ */
+static void dumpSFieldValue(const DexFile* pDexFile,
+                            EncodedStaticFieldValueIterator::ValueType valueType,
+                            const jvalue* pValue) {
+  switch (valueType) {
+    case EncodedStaticFieldValueIterator::kByte:
+      fprintf(gOutFile, "%" PRIu8, pValue->b);
+      break;
+    case EncodedStaticFieldValueIterator::kShort:
+      fprintf(gOutFile, "%" PRId16, pValue->s);
+      break;
+    case EncodedStaticFieldValueIterator::kChar:
+      fprintf(gOutFile, "%" PRIu16, pValue->c);
+      break;
+    case EncodedStaticFieldValueIterator::kInt:
+      fprintf(gOutFile, "%" PRId32, pValue->i);
+      break;
+    case EncodedStaticFieldValueIterator::kLong:
+      fprintf(gOutFile, "%" PRId64, pValue->j);
+      break;
+    case EncodedStaticFieldValueIterator::kFloat:
+      fprintf(gOutFile, "%f", pValue->f);
+      break;
+    case EncodedStaticFieldValueIterator::kDouble:
+      fprintf(gOutFile, "%f", pValue->d);
+      break;
+    case EncodedStaticFieldValueIterator::kString: {
+      const char* str =
+          pDexFile->GetStringData(pDexFile->GetStringId(pValue->i));
+      if (gOptions.outputFormat == OUTPUT_PLAIN) {
+        fputs("\"", gOutFile);
+        dumpEscapedString(str);
+        fputs("\"", gOutFile);
+      } else {
+        dumpXmlAttribute(str);
+      }
+      break;
+    }
+    case EncodedStaticFieldValueIterator::kNull:
+      fputs("null", gOutFile);
+      break;
+    case EncodedStaticFieldValueIterator::kBoolean:
+      fputs(pValue->z ? "true" : "false", gOutFile);
+      break;
+
+    case EncodedStaticFieldValueIterator::kAnnotation:
+    case EncodedStaticFieldValueIterator::kArray:
+    case EncodedStaticFieldValueIterator::kEnum:
+    case EncodedStaticFieldValueIterator::kField:
+    case EncodedStaticFieldValueIterator::kMethod:
+    case EncodedStaticFieldValueIterator::kType:
+    default:
+      fprintf(gOutFile, "Unexpected static field type: %d", valueType);
+  }
+}
+
+/*
  * Dumps a static (class) field.
  */
-static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
+static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i,
+                       EncodedStaticFieldValueIterator::ValueType valueType,
+                       const jvalue* pValue) {
   // Bail for anything private if export only requested.
   if (gOptions.exportsOnly && (flags & (kAccPublic | kAccProtected)) == 0) {
     return;
@@ -1037,6 +1153,11 @@
     fprintf(gOutFile, "      name          : '%s'\n", name);
     fprintf(gOutFile, "      type          : '%s'\n", typeDescriptor);
     fprintf(gOutFile, "      access        : 0x%04x (%s)\n", flags, accessStr);
+    if (pValue != nullptr) {
+      fputs("      value         : ", gOutFile);
+      dumpSFieldValue(pDexFile, valueType, pValue);
+      fputs("\n", gOutFile);
+    }
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "<field name=\"%s\"\n", name);
     char *tmp = descriptorToDot(typeDescriptor);
@@ -1049,7 +1170,12 @@
     fprintf(gOutFile, " final=%s\n", quotedBool((flags & kAccFinal) != 0));
     // The "deprecated=" is not knowable w/o parsing annotations.
     fprintf(gOutFile, " visibility=%s\n", quotedVisibility(flags));
-    fprintf(gOutFile, ">\n</field>\n");
+    if (pValue != nullptr) {
+      fputs(" value=\"", gOutFile);
+      dumpSFieldValue(pDexFile, valueType, pValue);
+      fputs("\"\n", gOutFile);
+    }
+    fputs(">\n</field>\n", gOutFile);
   }
 
   free(accessStr);
@@ -1059,7 +1185,8 @@
  * Dumps an instance field.
  */
 static void dumpIField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
-  dumpSField(pDexFile, idx, flags, i);
+  dumpSField(pDexFile, idx, flags, i,
+             EncodedStaticFieldValueIterator::kByte, nullptr);
 }
 
 /*
@@ -1191,6 +1318,8 @@
       fprintf(gOutFile, " extends=\"%s\"\n", tmp);
       free(tmp);
     }
+    fprintf(gOutFile, " interface=%s\n",
+            quotedBool((pClassDef.access_flags_ & kAccInterface) != 0));
     fprintf(gOutFile, " abstract=%s\n", quotedBool((pClassDef.access_flags_ & kAccAbstract) != 0));
     fprintf(gOutFile, " static=%s\n", quotedBool((pClassDef.access_flags_ & kAccStatic) != 0));
     fprintf(gOutFile, " final=%s\n", quotedBool((pClassDef.access_flags_ & kAccFinal) != 0));
@@ -1221,10 +1350,23 @@
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Static fields     -\n");
     }
+    EncodedStaticFieldValueIterator staticFieldValues(*pDexFile, pClassDef);
     for (int i = 0; pClassData.HasNextStaticField(); i++, pClassData.Next()) {
+      EncodedStaticFieldValueIterator::ValueType valueType =
+          EncodedStaticFieldValueIterator::kByte;
+      const jvalue* pValue = nullptr;
+      if (staticFieldValues.HasNext()) {
+        valueType = staticFieldValues.GetValueType();
+        pValue = &staticFieldValues.GetJavaValue();
+      }
       dumpSField(pDexFile, pClassData.GetMemberIndex(),
-                           pClassData.GetRawMemberAccessFlags(), i);
+                           pClassData.GetRawMemberAccessFlags(), i,
+                 valueType, pValue);
+      if (staticFieldValues.HasNext()) {
+        staticFieldValues.Next();
+      }
     }  // for
+    DCHECK(!staticFieldValues.HasNext());
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Instance fields   -\n");
     }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 723bb17..46ab34b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -153,6 +153,12 @@
     return false;
   }
 
+  if (image_header.GetStorageMode() != ImageHeader::kStorageModeUncompressed) {
+    LOG(ERROR) << "Patchoat is not supported with compressed image files "
+               << input_image->GetPath();
+    return false;
+  }
+
   /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
   // Nothing special to do right now since the image always needs to get patched.
   // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 74cc899..993f37f 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -229,6 +229,16 @@
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
+# Keep the __jit_debug_register_code symbol as a unique symbol during ICF for architectures where
+# we use gold as the linker (arm, x86, x86_64). The symbol is used by the debuggers to detect when
+# new jit code is generated. We don't want it to be called when a different function with the same
+# (empty) body is called.
+JIT_DEBUG_REGISTER_CODE_LDFLAGS := -Wl,--keep-unique,__jit_debug_register_code
+LIBART_TARGET_LDFLAGS_arm    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_x86    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_x86_64 := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+JIT_DEBUG_REGISTER_CODE_LDFLAGS :=
+
 LIBART_TARGET_SRC_FILES := \
   $(LIBART_COMMON_SRC_FILES) \
   jdwp/jdwp_adb.cc \
@@ -450,6 +460,8 @@
   ifeq ($$(art_target_or_host),target)
     LOCAL_CFLAGS += $$(LIBART_TARGET_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_TARGET_LDFLAGS)
+    $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
+      $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
   else #host
     LOCAL_CFLAGS += $$(LIBART_HOST_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_HOST_LDFLAGS)
@@ -457,8 +469,6 @@
       LOCAL_LDFLAGS += -static
     endif
   endif
-  $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
-    $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
 
   # Clang usage
   ifeq ($$(art_target_or_host),target)
@@ -491,9 +501,19 @@
   LOCAL_C_INCLUDES += art
 
   ifeq ($$(art_static_or_shared),static)
-    LOCAL_STATIC_LIBRARIES := libnativehelper libnativebridge libsigchain_dummy libbacktrace
+    LOCAL_STATIC_LIBRARIES := libnativehelper
+    LOCAL_STATIC_LIBRARIES += libnativebridge
+    LOCAL_STATIC_LIBRARIES += libnativeloader
+    LOCAL_STATIC_LIBRARIES += libsigchain_dummy
+    LOCAL_STATIC_LIBRARIES += libbacktrace
+    LOCAL_STATIC_LIBRARIES += liblz4
   else
-    LOCAL_SHARED_LIBRARIES := libnativehelper libnativebridge libsigchain libbacktrace
+    LOCAL_SHARED_LIBRARIES := libnativehelper
+    LOCAL_SHARED_LIBRARIES += libnativebridge
+    LOCAL_SHARED_LIBRARIES += libnativeloader
+    LOCAL_SHARED_LIBRARIES += libsigchain
+    LOCAL_SHARED_LIBRARIES += libbacktrace
+    LOCAL_SHARED_LIBRARIES += liblz4
   endif
 
   ifeq ($$(art_target_or_host),target)
@@ -577,8 +597,14 @@
 LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES :=
 LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
 2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
-LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
+LIBART_TARGET_LDFLAGS :=
+LIBART_TARGET_LDFLAGS_arm :=
+LIBART_TARGET_LDFLAGS_arm64 :=
+LIBART_TARGET_LDFLAGS_x86 :=
+LIBART_TARGET_LDFLAGS_x86_64 :=
+LIBART_TARGET_LDFLAGS_mips :=
+LIBART_TARGET_LDFLAGS_mips64 :=
 LIBART_TARGET_SRC_FILES :=
 LIBART_TARGET_SRC_FILES_arm :=
 LIBART_TARGET_SRC_FILES_arm64 :=
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index be33b0e..7141181 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -166,6 +166,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 63285a4..5c8ff8f 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -149,6 +149,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 74e7638..0d01ad5 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -45,6 +45,7 @@
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
+      entrypoint == kQuickReadBarrierMark ||
       entrypoint == kQuickReadBarrierSlow ||
       entrypoint == kQuickReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index cba427d..51eb77f 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -274,6 +274,8 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMark = artReadBarrierMark;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierMark), "Direct C stub not marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 89f54dd..4bdb38e 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -180,6 +180,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index e200018..e593f39 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -28,6 +28,7 @@
                                             const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -93,6 +94,25 @@
   qpoints->pLockObject = art_quick_lock_object;
   qpoints->pUnlockObject = art_quick_unlock_object;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Math
   qpoints->pD2l = art_quick_d2l;
   qpoints->pF2l = art_quick_f2l;
@@ -139,6 +159,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 463c9cf..da30331 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1686,6 +1686,14 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_mark
+    PUSH eax                         // pass arg1 - obj
+    call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
+    addl LITERAL(4), %esp            // pop argument
+    CFI_ADJUST_CFA_OFFSET(-4)
+    ret
+END_FUNCTION art_quick_read_barrier_mark
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     PUSH edx                         // pass arg3 - offset
     PUSH ecx                         // pass arg2 - obj
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2b38c9d..0a5d14a 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -29,6 +29,7 @@
                                                    const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -98,6 +99,25 @@
   qpoints->pLockObject = art_quick_lock_object;
   qpoints->pUnlockObject = art_quick_unlock_object;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Math
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
@@ -143,6 +163,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 17d277e..883da96 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1712,6 +1712,17 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_mark
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp           // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_slow
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     SETUP_FP_CALLEE_SAVE_FRAME
     subq LITERAL(8), %rsp           // Alignment padding.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index b548dfb..c86614c 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -122,7 +122,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 151 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 169 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h
index 2bd12df..51dfba8 100644
--- a/runtime/base/dchecked_vector.h
+++ b/runtime/base/dchecked_vector.h
@@ -33,7 +33,7 @@
 // but we do not use exceptions, so this accessor is deliberately hidden.
 // Note: The common pattern &v[0] used to retrieve pointer to the data is not
 // valid for an empty dchecked_vector<>. Use data() to avoid checking empty().
-template <typename T, typename Alloc>
+template <typename T, typename Alloc = std::allocator<T>>
 class dchecked_vector : private std::vector<T, Alloc> {
  private:
   // std::vector<> has a slightly different specialization for bool. We don't provide that.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0a37f26..f5085ed 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3056,10 +3056,12 @@
   verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   std::string error_msg;
   if (!preverified) {
+    Runtime* runtime = Runtime::Current();
     verifier_failure = verifier::MethodVerifier::VerifyClass(self,
                                                              klass.Get(),
-                                                             Runtime::Current()->IsAotCompiler(),
-                                                             Runtime::Current()->IsAotCompiler(),
+                                                             runtime->GetCompilerCallbacks(),
+                                                             runtime->IsAotCompiler(),
+                                                             runtime->IsAotCompiler(),
                                                              &error_msg);
   }
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index af7b04f..a39d682 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -37,8 +37,8 @@
 
   virtual ~CompilerCallbacks() { }
 
-  virtual bool MethodVerified(verifier::MethodVerifier* verifier)
-  SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void MethodVerified(verifier::MethodVerifier* verifier)
+      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
   virtual void ClassRejected(ClassReference ref) = 0;
 
   // Return true if we should attempt to relocate to a random base address if we have not already
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index e62aa04..880d3e0 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -2246,13 +2246,48 @@
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
-    const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
-    Handle<mirror::ClassLoader>* class_loader, ClassLinker* linker,
+    const DexFile& dex_file,
     const DexFile::ClassDef& class_def)
-    : dex_file_(dex_file), dex_cache_(dex_cache), class_loader_(class_loader), linker_(linker),
-      array_size_(), pos_(-1), type_(kByte) {
-  DCHECK(dex_cache != nullptr);
-  DCHECK(class_loader != nullptr);
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      nullptr,
+                                      nullptr,
+                                      nullptr,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def)
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      dex_cache, class_loader,
+                                      linker,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def,
+    size_t pos,
+    ValueType type)
+    : dex_file_(dex_file),
+      dex_cache_(dex_cache),
+      class_loader_(class_loader),
+      linker_(linker),
+      array_size_(),
+      pos_(pos),
+      type_(type) {
   ptr_ = dex_file.GetEncodedStaticFieldValuesArray(class_def);
   if (ptr_ == nullptr) {
     array_size_ = 0;
@@ -2326,6 +2361,8 @@
 
 template<bool kTransactionActive>
 void EncodedStaticFieldValueIterator::ReadValueToField(ArtField* field) const {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
   switch (type_) {
     case kBoolean: field->SetBoolean<kTransactionActive>(field->GetDeclaringClass(), jval_.z);
         break;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index a9f1e8d..8a3db6c 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1517,9 +1517,17 @@
 
 class EncodedStaticFieldValueIterator {
  public:
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
+  // A constructor for static tools. You cannot call
+  // ReadValueToField() for an object created by this.
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  const DexFile::ClassDef& class_def);
+
+  // A constructor meant to be called from runtime code.
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
                                   Handle<mirror::ClassLoader>* class_loader,
-                                  ClassLinker* linker, const DexFile::ClassDef& class_def)
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
@@ -1548,7 +1556,18 @@
     kBoolean = 0x1f
   };
 
+  ValueType GetValueType() const { return type_; }
+  const jvalue& GetJavaValue() const { return jval_; }
+
  private:
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
+                                  Handle<mirror::ClassLoader>* class_loader,
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def,
+                                  size_t pos,
+                                  ValueType type);
+
   static constexpr uint8_t kEncodedValueTypeMask = 0x1f;  // 0b11111
   static constexpr uint8_t kEncodedValueArgShift = 5;
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 440d696..727f4fc 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -2508,11 +2508,12 @@
                                   method_access_flags);
         return false;
       }
-      // Abstract methods must be in an abstract class or interface.
+      // Abstract methods should be in an abstract class or interface.
       if ((class_access_flags & (kAccInterface | kAccAbstract)) == 0) {
-        *error_msg = StringPrintf("Method %" PRIu32 " is abstract, but the declaring class "
-                                  "is neither abstract nor an interface", method_index);
-        return false;
+        LOG(WARNING) << "Method " << PrettyMethod(method_index, *dex_file_)
+                     << " is abstract, but the declaring class is neither abstract nor an "
+                     << "interface in dex file "
+                     << dex_file_->GetLocation();
       }
     }
     // Interfaces are special.
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 27865e3..f5b68fa 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -79,11 +79,17 @@
 // functions directly.  For x86 and x86-64, compilers need a wrapper
 // assembly function, to handle mismatch in ABI.
 
+// Mark the heap reference `obj`. This entry point is used by read
+// barrier fast path implementations generated by the compiler to mark
+// an object that is referenced by a field of a gray object.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 // Read barrier entrypoint for heap references.
-// This is the read barrier slow path for instance and static fields and reference-type arrays.
-// TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
-// Ideally the slow path should only take one parameter "ref".
-extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Object* obj,
+// This is the read barrier slow path for instance and static fields
+// and reference type arrays.
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+                                              mirror::Object* obj,
                                               uint32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index ee7b986..faa4747 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -86,6 +86,23 @@
   V(CmpgFloat, int32_t, float, float) \
   V(CmplDouble, int32_t, double, double) \
   V(CmplFloat, int32_t, float, float) \
+  V(Cos, double, double) \
+  V(Sin, double, double) \
+  V(Acos, double, double) \
+  V(Asin, double, double) \
+  V(Atan, double, double) \
+  V(Atan2, double, double, double) \
+  V(Cbrt, double, double) \
+  V(Cosh, double, double) \
+  V(Exp, double, double) \
+  V(Expm1, double, double) \
+  V(Hypot, double, double, double) \
+  V(Log, double, double) \
+  V(Log10, double, double) \
+  V(NextAfter, double, double, double) \
+  V(Sinh, double, double) \
+  V(Tan, double, double) \
+  V(Tanh, double, double) \
   V(Fmod, double, double, double) \
   V(L2d, double, int64_t) \
   V(Fmodf, float, float, float) \
@@ -146,6 +163,7 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
+  V(ReadBarrierMark, mirror::Object*, mirror::Object*) \
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
   V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 7ec5fc5..25c0bda 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -559,8 +559,11 @@
   return -1;  // failure
 }
 
-// TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
-// take one parameter "ref", which is given by the fast path.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj) {
+  DCHECK(kEmitCompilerReadBarrier);
+  return ReadBarrier::Mark(obj);
+}
+
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj,
                                               uint32_t offset) {
@@ -579,7 +582,6 @@
 
 extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) {
   DCHECK(kEmitCompilerReadBarrier);
-  // TODO: Pass a GcRootSource object as second argument to GcRoot::Read?
   return root->Read();
 }
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 8587ede..dc9f14c 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -223,7 +223,24 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgDouble, pCmpgFloat, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgFloat, pCmplDouble, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplDouble, pCmplFloat, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplFloat, pFmod, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplFloat, pCos, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCos, pSin, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pSin, pAcos, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAcos, pAsin, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAsin, pAtan, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan, pAtan2, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pCbrt, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCbrt, pCosh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCosh, pExp, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExp, pExpm1, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExpm1, pHypot, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pHypot, pLog, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLog, pLog10, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLog10, pNextAfter, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNextAfter, pSinh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pSinh, pTan, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTan, pTanh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTanh, pFmod, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pFmod, pL2d, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pL2d, pFmodf, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pFmodf, pL2f, sizeof(void*));
@@ -301,7 +318,8 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMark, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMark, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
 
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index f741117..4884e66 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -113,7 +113,9 @@
         /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
         /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size),
         /*pointer_size*/sizeof(void*),
-        /*compile_pic*/false);
+        /*compile_pic*/false,
+        ImageHeader::kStorageModeUncompressed,
+        /*storage_size*/0u);
     return new DummyImageSpace(map.release(), live_bitmap.release());
   }
 };
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e2b2431..8f67c21 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -17,12 +17,12 @@
 #include "image_space.h"
 
 #include <dirent.h>
+#include <lz4.h>
+#include <random>
 #include <sys/statvfs.h>
 #include <sys/types.h>
 #include <unistd.h>
 
-#include <random>
-
 #include "art_method.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
@@ -677,11 +677,12 @@
     *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
     return nullptr;
   }
-  // Check that the file is large enough.
-  uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
-  if (image_header.GetImageSize() > image_file_size) {
-    *error_msg = StringPrintf("Image file too small for image heap: %" PRIu64 " vs. %zu.",
-                              image_file_size, image_header.GetImageSize());
+  // Check that the file is larger or equal to the header size + data size.
+  const uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
+  if (image_file_size < sizeof(ImageHeader) + image_header.GetDataSize()) {
+    *error_msg = StringPrintf("Image file truncated: %" PRIu64 " vs. %" PRIu64 ".",
+                              image_file_size,
+                              image_header.GetDataSize());
     return nullptr;
   }
 
@@ -697,7 +698,11 @@
   }
 
   const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
-  auto end_of_bitmap = static_cast<size_t>(bitmap_section.End());
+  // The location we want to map from is the first aligned page after the end of the stored
+  // (possibly compressed) data.
+  const size_t image_bitmap_offset = RoundUp(sizeof(image_header) + image_header.GetDataSize(),
+                                             kPageSize);
+  const size_t end_of_bitmap = image_bitmap_offset + bitmap_section.Size();
   if (end_of_bitmap != image_file_size) {
     *error_msg = StringPrintf(
         "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
@@ -706,16 +711,60 @@
   }
 
   // Note: The image header is part of the image due to mmap page alignment required of offset.
-  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
-                                                       image_header.GetImageSize(),
-                                                       PROT_READ | PROT_WRITE,
+  std::unique_ptr<MemMap> map;
+  if (image_header.GetStorageMode() == ImageHeader::kStorageModeUncompressed) {
+    map.reset(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
+                                       image_header.GetImageSize(),
+                                       PROT_READ | PROT_WRITE,
+                                       MAP_PRIVATE,
+                                       file->Fd(),
+                                       0,
+                                       /*low_4gb*/false,
+                                       /*reuse*/false,
+                                       image_filename,
+                                       error_msg));
+  } else {
+    // Reserve output and decompress into it.
+    map.reset(MemMap::MapAnonymous(image_location,
+                                   image_header.GetImageBegin(),
+                                   image_header.GetImageSize(),
+                                   PROT_READ | PROT_WRITE,
+                                   /*low_4gb*/false,
+                                   /*reuse*/false,
+                                   error_msg));
+    if (map != nullptr) {
+      const size_t stored_size = image_header.GetDataSize();
+      const size_t write_offset = sizeof(image_header);  // Skip the header.
+      std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
+                                                       PROT_READ,
                                                        MAP_PRIVATE,
                                                        file->Fd(),
-                                                       0,
+                                                       /*offset*/0,
                                                        /*low_4gb*/false,
-                                                       /*reuse*/false,
                                                        image_filename,
                                                        error_msg));
+      if (temp_map == nullptr) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+      memcpy(map->Begin(), &image_header, sizeof(image_header));
+      const uint64_t start = NanoTime();
+      const size_t decompressed_size = LZ4_decompress_safe(
+          reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
+          reinterpret_cast<char*>(map->Begin()) + write_offset,
+          stored_size,
+          map->Size());
+      // TODO: VLOG(image)
+      VLOG(class_linker) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
+      if (decompressed_size + sizeof(ImageHeader) != image_header.GetImageSize()) {
+        *error_msg = StringPrintf("Decompressed size does not match expected image size %zu vs %zu",
+                                  decompressed_size + sizeof(ImageHeader),
+                                  image_header.GetImageSize());
+        return nullptr;
+      }
+    }
+  }
+
   if (map == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
@@ -723,16 +772,16 @@
   CHECK_EQ(image_header.GetImageBegin(), map->Begin());
   DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
 
-  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr,
-                                                             bitmap_section.Size(),
-                                                             PROT_READ, MAP_PRIVATE,
-                                                             file->Fd(),
-                                                             bitmap_section.Offset(),
-                                                             /*low_4gb*/false,
-                                                             /*reuse*/false,
-                                                             image_filename,
-                                                             error_msg));
-  if (image_map.get() == nullptr) {
+  std::unique_ptr<MemMap> image_bitmap_map(MemMap::MapFileAtAddress(nullptr,
+                                                                    bitmap_section.Size(),
+                                                                    PROT_READ, MAP_PRIVATE,
+                                                                    file->Fd(),
+                                                                    image_bitmap_offset,
+                                                                    /*low_4gb*/false,
+                                                                    /*reuse*/false,
+                                                                    image_filename,
+                                                                    error_msg));
+  if (image_bitmap_map == nullptr) {
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
@@ -741,9 +790,11 @@
                                        bitmap_index));
   std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::CreateFromMemMap(
-          bitmap_name, image_map.release(), reinterpret_cast<uint8_t*>(map->Begin()),
+          bitmap_name,
+          image_bitmap_map.release(),
+          reinterpret_cast<uint8_t*>(map->Begin()),
           accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size())));
-  if (bitmap.get() == nullptr) {
+  if (bitmap == nullptr) {
     *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
     return nullptr;
   }
diff --git a/runtime/image.cc b/runtime/image.cc
index 2eac3fb..7d2ef75 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '3', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '4', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -36,7 +36,9 @@
                          uint32_t oat_data_end,
                          uint32_t oat_file_end,
                          uint32_t pointer_size,
-                         bool compile_pic)
+                         bool compile_pic,
+                         StorageMode storage_mode,
+                         size_t data_size)
   : image_begin_(image_begin),
     image_size_(image_size),
     oat_checksum_(oat_checksum),
@@ -47,7 +49,9 @@
     patch_delta_(0),
     image_roots_(image_roots),
     pointer_size_(pointer_size),
-    compile_pic_(compile_pic) {
+    compile_pic_(compile_pic),
+    storage_mode_(storage_mode),
+    data_size_(data_size) {
   CHECK_EQ(image_begin, RoundUp(image_begin, kPageSize));
   CHECK_EQ(oat_file_begin, RoundUp(oat_file_begin, kPageSize));
   CHECK_EQ(oat_data_begin, RoundUp(oat_data_begin, kPageSize));
diff --git a/runtime/image.h b/runtime/image.h
index a16f3c9..7418f66 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -78,10 +78,27 @@
 // header of image files written by ImageWriter, read and validated by Space.
 class PACKED(4) ImageHeader {
  public:
+  enum StorageMode : uint32_t {
+    kStorageModeUncompressed,
+    kStorageModeLZ4,
+    kStorageModeCount,  // Number of elements in enum.
+  };
+  static constexpr StorageMode kDefaultStorageMode = kStorageModeUncompressed;
+
   ImageHeader()
-      : image_begin_(0U), image_size_(0U), oat_checksum_(0U), oat_file_begin_(0U),
-        oat_data_begin_(0U), oat_data_end_(0U), oat_file_end_(0U), patch_delta_(0),
-        image_roots_(0U), pointer_size_(0U), compile_pic_(0) {}
+      : image_begin_(0U),
+        image_size_(0U),
+        oat_checksum_(0U),
+        oat_file_begin_(0U),
+        oat_data_begin_(0U),
+        oat_data_end_(0U),
+        oat_file_end_(0U),
+        patch_delta_(0),
+        image_roots_(0U),
+        pointer_size_(0U),
+        compile_pic_(0),
+        storage_mode_(kDefaultStorageMode),
+        data_size_(0) {}
 
   ImageHeader(uint32_t image_begin,
               uint32_t image_size,
@@ -93,7 +110,9 @@
               uint32_t oat_data_end,
               uint32_t oat_file_end,
               uint32_t pointer_size,
-              bool compile_pic);
+              bool compile_pic,
+              StorageMode storage_mode,
+              size_t data_size);
 
   bool IsValid() const;
   const char* GetMagic() const;
@@ -194,6 +213,14 @@
     return compile_pic_ != 0;
   }
 
+  StorageMode GetStorageMode() const {
+    return storage_mode_;
+  }
+
+  uint64_t GetDataSize() const {
+    return data_size_;
+  }
+
  private:
   static const uint8_t kImageMagic[4];
   static const uint8_t kImageVersion[4];
@@ -235,12 +262,19 @@
   // Boolean (0 or 1) to denote if the image was compiled with --compile-pic option
   const uint32_t compile_pic_;
 
-  // Image sections
+  // Image section sizes/offsets correspond to the uncompressed form.
   ImageSection sections_[kSectionCount];
 
   // Image methods.
   uint64_t image_methods_[kImageMethodsCount];
 
+  // Storage method for the image, the image may be compressed.
+  StorageMode storage_mode_;
+
+  // Data size for the image data excluding the bitmap and the header. For compressed images, this
+  // is the compressed size in the file.
+  uint32_t data_size_;
+
   friend class ImageWriter;
 };
 
@@ -248,6 +282,7 @@
 std::ostream& operator<<(std::ostream& os, const ImageHeader::ImageRoot& policy);
 std::ostream& operator<<(std::ostream& os, const ImageHeader::ImageSections& section);
 std::ostream& operator<<(std::ostream& os, const ImageSection& section);
+std::ostream& operator<<(std::ostream& os, const ImageHeader::StorageMode& mode);
 
 }  // namespace art
 
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 7cc05f7..15f5122 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -17,6 +17,7 @@
 #include "jni_internal.h"
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
+
 #include <cutils/trace.h>
 #include <dlfcn.h>
 
@@ -31,6 +32,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
+#include "nativeloader/native_loader.h"
 #include "java_vm_ext.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
@@ -715,6 +717,7 @@
 }
 
 bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
+                                  jstring library_path, jstring permitted_path,
                                   std::string* error_msg) {
   error_msg->clear();
 
@@ -774,7 +777,8 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_NOW);
+  void* handle = android::OpenNativeLibrary(env, runtime_->GetTargetSdkVersion(),
+                                            path_str, class_loader, library_path, permitted_path);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 618f6fa..8559769 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -82,11 +82,11 @@
   /**
    * Loads the given shared library. 'path' is an absolute pathname.
    *
-   * Returns 'true' on success. On failure, sets 'detail' to a
+   * Returns 'true' on success. On failure, sets 'error_msg' to a
    * human-readable description of the error.
    */
   bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
-                         std::string* error_msg);
+                         jstring library_path, jstring permitted_path, std::string* error_msg);
 
   // Unload native libraries with cleared class loaders.
   void UnloadNativeLibraries()
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 92aa86e..a653440 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -188,7 +188,7 @@
 
   uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
   if (offline_profile_info_->NeedsSaving(last_update_ns)) {
-    VLOG(profiler) << "Iniate save profiling information to: " << filename;
+    VLOG(profiler) << "Initiate save profiling information to: " << filename;
     std::set<ArtMethod*> methods;
     {
       ScopedObjectAccess soa(Thread::Current());
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index 4450653..7615870 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -68,7 +68,6 @@
   }
 }
 
-
 void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
   DCHECK(method != nullptr);
   const DexFile* dex_file = method->GetDexFile();
@@ -80,11 +79,25 @@
   info_it->second.insert(method->GetDexMethodIndex());
 }
 
-static int OpenOrCreateFile(const std::string& filename) {
-  // TODO(calin) allow the shared uid of the app to access the file.
-  int fd = open(filename.c_str(),
-                O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
-                S_IRUSR | S_IWUSR);
+enum OpenMode {
+  READ,
+  READ_WRITE
+};
+
+static int OpenFile(const std::string& filename, OpenMode open_mode) {
+  int fd = -1;
+  switch (open_mode) {
+    case READ:
+      fd = open(filename.c_str(), O_RDONLY);
+      break;
+    case READ_WRITE:
+      // TODO(calin) allow the shared uid of the app to access the file.
+      fd = open(filename.c_str(),
+                    O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                    S_IRUSR | S_IWUSR);
+      break;
+  }
+
   if (fd < 0) {
     PLOG(WARNING) << "Failed to open profile file " << filename;
     return -1;
@@ -96,7 +109,6 @@
     PLOG(WARNING) << "Failed to lock profile file " << filename;
     return -1;
   }
-
   return fd;
 }
 
@@ -129,8 +141,8 @@
   } while (length > 0);
 }
 
-static constexpr char kFieldSeparator = ',';
-static constexpr char kLineSeparator = '\n';
+static constexpr const char kFieldSeparator = ',';
+static constexpr const char kLineSeparator = '\n';
 
 /**
  * Serialization format:
@@ -142,7 +154,7 @@
  **/
 bool OfflineProfilingInfo::Serialize(const std::string& filename,
                                      const DexFileToMethodsMap& info) const {
-  int fd = OpenOrCreateFile(filename);
+  int fd = OpenFile(filename, READ_WRITE);
   if (fd == -1) {
     return false;
   }
@@ -168,4 +180,212 @@
 
   return CloseDescriptorForFile(fd, filename);
 }
+
+// TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
+// is the separator. Merge the fix into Utils::Split once verified that it doesn't break its users.
+static void SplitString(const std::string& s, char separator, std::vector<std::string>* result) {
+  const char* p = s.data();
+  const char* end = p + s.size();
+  // Check if the first character is the separator.
+  if (p != end && *p ==separator) {
+    result->push_back("");
+    ++p;
+  }
+  // Process the rest of the characters.
+  while (p != end) {
+    if (*p == separator) {
+      ++p;
+    } else {
+      const char* start = p;
+      while (++p != end && *p != separator) {
+        // Skip to the next occurrence of the separator.
+      }
+      result->push_back(std::string(start, p - start));
+    }
+  }
+}
+
+bool ProfileCompilationInfo::ProcessLine(const std::string& line,
+                                         const std::vector<const DexFile*>& dex_files) {
+  std::vector<std::string> parts;
+  SplitString(line, kFieldSeparator, &parts);
+  if (parts.size() < 3) {
+    LOG(WARNING) << "Invalid line: " << line;
+    return false;
+  }
+
+  const std::string& multidex_suffix = parts[0];
+  uint32_t checksum;
+  if (!ParseInt(parts[1].c_str(), &checksum)) {
+    return false;
+  }
+
+  const DexFile* current_dex_file = nullptr;
+  for (auto dex_file : dex_files) {
+    if (DexFile::GetMultiDexSuffix(dex_file->GetLocation()) == multidex_suffix) {
+      if (checksum != dex_file->GetLocationChecksum()) {
+        LOG(WARNING) << "Checksum mismatch for "
+            << dex_file->GetLocation() << " when parsing " << filename_;
+        return false;
+      }
+      current_dex_file = dex_file;
+      break;
+    }
+  }
+  if (current_dex_file == nullptr) {
+    return true;
+  }
+
+  for (size_t i = 2; i < parts.size(); i++) {
+    uint32_t method_idx;
+    if (!ParseInt(parts[i].c_str(), &method_idx)) {
+      LOG(WARNING) << "Cannot parse method_idx " << parts[i];
+      return false;
+    }
+    uint16_t class_idx = current_dex_file->GetMethodId(method_idx).class_idx_;
+    auto info_it = info_.find(current_dex_file);
+    if (info_it == info_.end()) {
+      info_it = info_.Put(current_dex_file, ClassToMethodsMap());
+    }
+    ClassToMethodsMap& class_map = info_it->second;
+    auto class_it = class_map.find(class_idx);
+    if (class_it == class_map.end()) {
+      class_it = class_map.Put(class_idx, std::set<uint32_t>());
+    }
+    class_it->second.insert(method_idx);
+  }
+  return true;
+}
+
+// Parses the buffer (of length n) starting from start_from and identify new lines
+// based on kLineSeparator marker.
+// Returns the first position after kLineSeparator in the buffer (starting from start_from),
+// or -1 if the marker doesn't appear.
+// The processed characters are appended to the given line.
+static int GetLineFromBuffer(char* buffer, int n, int start_from, std::string& line) {
+  if (start_from >= n) {
+    return -1;
+  }
+  int new_line_pos = -1;
+  for (int i = start_from; i < n; i++) {
+    if (buffer[i] == kLineSeparator) {
+      new_line_pos = i;
+      break;
+    }
+  }
+  int append_limit = new_line_pos == -1 ? n : new_line_pos;
+  line.append(buffer + start_from, append_limit - start_from);
+  // Jump over kLineSeparator and return the position of the next character.
+  return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
+}
+
+bool ProfileCompilationInfo::Load(const std::vector<const DexFile*>& dex_files) {
+  if (dex_files.empty()) {
+    return true;
+  }
+  if (kIsDebugBuild) {
+    // In debug builds verify that the multidex suffixes are unique.
+    std::set<std::string> suffixes;
+    for (auto dex_file : dex_files) {
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(dex_file->GetLocation());
+      DCHECK(suffixes.find(multidex_suffix) == suffixes.end())
+          << "DexFiles appear to belong to different apks."
+          << " There are multiple dex files with the same multidex suffix: "
+          << multidex_suffix;
+      suffixes.insert(multidex_suffix);
+    }
+  }
+  info_.clear();
+
+  int fd = OpenFile(filename_, READ);
+  if (fd == -1) {
+    return false;
+  }
+
+  std::string current_line;
+  const int kBufferSize = 1024;
+  char buffer[kBufferSize];
+  bool success = true;
+
+  while (success) {
+    int n = read(fd, buffer, kBufferSize);
+    if (n < 0) {
+      PLOG(WARNING) << "Error when reading profile file " << filename_;
+      success = false;
+      break;
+    } else if (n == 0) {
+      break;
+    }
+    // Detect the new lines from the buffer. If we manage to complete a line,
+    // process it. Otherwise append to the current line.
+    int current_start_pos = 0;
+    while (current_start_pos < n) {
+      current_start_pos = GetLineFromBuffer(buffer, n, current_start_pos, current_line);
+      if (current_start_pos == -1) {
+        break;
+      }
+      if (!ProcessLine(current_line, dex_files)) {
+        success = false;
+        break;
+      }
+      // Reset the current line (we just processed it).
+      current_line.clear();
+    }
+  }
+  if (!success) {
+    info_.clear();
+  }
+  return CloseDescriptorForFile(fd, filename_) && success;
+}
+
+bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
+  auto info_it = info_.find(method_ref.dex_file);
+  if (info_it != info_.end()) {
+    uint16_t class_idx = method_ref.dex_file->GetMethodId(method_ref.dex_method_index).class_idx_;
+    const ClassToMethodsMap& class_map = info_it->second;
+    auto class_it = class_map.find(class_idx);
+    if (class_it != class_map.end()) {
+      const std::set<uint32_t>& methods = class_it->second;
+      return methods.find(method_ref.dex_method_index) != methods.end();
+    }
+    return false;
+  }
+  return false;
+}
+
+std::string ProfileCompilationInfo::DumpInfo(bool print_full_dex_location) const {
+  std::ostringstream os;
+  if (info_.empty()) {
+    return "ProfileInfo: empty";
+  }
+
+  os << "ProfileInfo:";
+
+  // Use an additional map to achieve a predefined order based on the dex locations.
+  SafeMap<const std::string, const DexFile*> dex_locations_map;
+  for (auto info_it : info_) {
+    dex_locations_map.Put(info_it.first->GetLocation(), info_it.first);
+  }
+
+  const std::string kFirstDexFileKeySubstitute = ":classes.dex";
+  for (auto dex_file_it : dex_locations_map) {
+    os << "\n";
+    const std::string& location = dex_file_it.first;
+    const DexFile* dex_file = dex_file_it.second;
+    if (print_full_dex_location) {
+      os << location;
+    } else {
+      // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
+      os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
+    }
+    for (auto class_it : info_.find(dex_file)->second) {
+      for (auto method_it : class_it.second) {
+        os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+      }
+    }
+  }
+  return os.str();
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index e3117eb..90bda60 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -21,6 +21,7 @@
 
 #include "atomic.h"
 #include "dex_file.h"
+#include "method_reference.h"
 #include "safe_map.h"
 
 namespace art {
@@ -50,10 +51,47 @@
   bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
 
   // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
-  // singe thread?)
+  // single thread?)
   Atomic<uint64_t> last_update_time_ns_;
 };
 
+/**
+ * Profile information in a format suitable to be queried by the compiler and performing
+ * profile guided compilation.
+ */
+class ProfileCompilationInfo {
+ public:
+  // Constructs a ProfileCompilationInfo backed by the provided file.
+  explicit ProfileCompilationInfo(const std::string& filename) : filename_(filename) {}
+
+  // Loads profile information corresponding to the provided dex files.
+  // The dex files' multidex suffixes must be unique.
+  // This resets the state of the profiling information
+  // (i.e. all previously loaded info are cleared).
+  bool Load(const std::vector<const DexFile*>& dex_files);
+
+  // Returns true if the method reference is present in the profiling info.
+  bool ContainsMethod(const MethodReference& method_ref) const;
+
+  const std::string& GetFilename() const { return filename_; }
+
+  // Dumps all the loaded profile info into a string and returns it.
+  // This is intended for testing and debugging.
+  std::string DumpInfo(bool print_full_dex_location = true) const;
+
+ private:
+  bool ProcessLine(const std::string& line,
+                   const std::vector<const DexFile*>& dex_files);
+
+  using ClassToMethodsMap = SafeMap<uint32_t, std::set<uint32_t>>;
+  // Map identifying the location of the profiled methods.
+  // dex_file -> class_index -> [dex_method_index]+
+  using DexFileToProfileInfoMap = SafeMap<const DexFile*, ClassToMethodsMap>;
+
+  const std::string filename_;
+  DexFileToProfileInfoMap info_;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 856a3e7..4a1e6c2 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -52,10 +52,10 @@
   exit(status);
 }
 
-static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) {
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
 #ifdef __ANDROID__
-  if (javaLdLibraryPathJstr != nullptr) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr);
+  if (javaLdLibraryPath != nullptr) {
+    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
     if (ldLibraryPath.c_str() != nullptr) {
       android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
     }
@@ -63,23 +63,31 @@
 
 #else
   LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
-  UNUSED(javaLdLibraryPathJstr, env);
+  UNUSED(javaLdLibraryPath, env);
 #endif
 }
 
 static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPathJstr) {
+                                  jstring javaLdLibraryPath, jstring javaIsolationPath) {
   ScopedUtfChars filename(env, javaFilename);
   if (filename.c_str() == nullptr) {
     return nullptr;
   }
 
-  SetLdLibraryPath(env, javaLdLibraryPathJstr);
+  int32_t target_sdk_version = Runtime::Current()->GetTargetSdkVersion();
+
+  // Starting with N nativeLoad uses classloader local
+  // linker namespace instead of global LD_LIBRARY_PATH
+  // (23 is Marshmallow)
+  if (target_sdk_version <= INT_MAX) {
+    SetLdLibraryPath(env, javaLdLibraryPath);
+  }
 
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader, &error_msg);
+    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader,
+                                         javaLdLibraryPath, javaIsolationPath, &error_msg);
     if (success) {
       return nullptr;
     }
@@ -107,7 +115,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/noop_compiler_callbacks.h b/runtime/noop_compiler_callbacks.h
index 1cbf2bb..02081cb 100644
--- a/runtime/noop_compiler_callbacks.h
+++ b/runtime/noop_compiler_callbacks.h
@@ -26,8 +26,7 @@
   NoopCompilerCallbacks() : CompilerCallbacks(CompilerCallbacks::CallbackMode::kCompileApp) {}
   ~NoopCompilerCallbacks() {}
 
-  bool MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {
-    return true;
+  void MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {
   }
 
   void ClassRejected(ClassReference ref ATTRIBUTE_UNUSED) OVERRIDE {}
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 40aca0d..c787b9a 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -45,9 +45,7 @@
 
 OatHeader* OatHeader::Create(InstructionSet instruction_set,
                              const InstructionSetFeatures* instruction_set_features,
-                             const std::vector<const DexFile*>* dex_files,
-                             uint32_t image_file_location_oat_checksum,
-                             uint32_t image_file_location_oat_data_begin,
+                             uint32_t dex_file_count,
                              const SafeMap<std::string, std::string>* variable_data) {
   // Estimate size of optional data.
   size_t needed_size = ComputeOatHeaderSize(variable_data);
@@ -58,18 +56,29 @@
   // Create the OatHeader in-place.
   return new (memory) OatHeader(instruction_set,
                                 instruction_set_features,
-                                dex_files,
-                                image_file_location_oat_checksum,
-                                image_file_location_oat_data_begin,
+                                dex_file_count,
                                 variable_data);
 }
 
 OatHeader::OatHeader(InstructionSet instruction_set,
                      const InstructionSetFeatures* instruction_set_features,
-                     const std::vector<const DexFile*>* dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uint32_t image_file_location_oat_data_begin,
-                     const SafeMap<std::string, std::string>* variable_data) {
+                     uint32_t dex_file_count,
+                     const SafeMap<std::string, std::string>* variable_data)
+    : adler32_checksum_(adler32(0L, Z_NULL, 0)),
+      instruction_set_(instruction_set),
+      instruction_set_features_bitmap_(instruction_set_features->AsBitmap()),
+      dex_file_count_(dex_file_count),
+      executable_offset_(0),
+      interpreter_to_interpreter_bridge_offset_(0),
+      interpreter_to_compiled_code_bridge_offset_(0),
+      jni_dlsym_lookup_offset_(0),
+      quick_generic_jni_trampoline_offset_(0),
+      quick_imt_conflict_trampoline_offset_(0),
+      quick_resolution_trampoline_offset_(0),
+      quick_to_interpreter_bridge_offset_(0),
+      image_patch_delta_(0),
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0) {
   // Don't want asserts in header as they would be checked in each file that includes it. But the
   // fields are private, so we check inside a method.
   static_assert(sizeof(magic_) == sizeof(kOatMagic),
@@ -79,46 +88,11 @@
 
   memcpy(magic_, kOatMagic, sizeof(kOatMagic));
   memcpy(version_, kOatVersion, sizeof(kOatVersion));
-  executable_offset_ = 0;
-  image_patch_delta_ = 0;
-
-  adler32_checksum_ = adler32(0L, Z_NULL, 0);
 
   CHECK_NE(instruction_set, kNone);
-  instruction_set_ = instruction_set;
-  UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
-
-  instruction_set_features_bitmap_ = instruction_set_features->AsBitmap();
-  UpdateChecksum(&instruction_set_features_bitmap_, sizeof(instruction_set_features_bitmap_));
-
-  dex_file_count_ = dex_files->size();
-  UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
-
-  image_file_location_oat_checksum_ = image_file_location_oat_checksum;
-  UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
-
-  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
-  image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
-  UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
 
   // Flatten the map. Will also update variable_size_data_size_.
   Flatten(variable_data);
-
-  // Update checksum for variable data size.
-  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
-
-  // Update for data, if existing.
-  if (key_value_store_size_ > 0U) {
-    UpdateChecksum(&key_value_store_, key_value_store_size_);
-  }
-
-  interpreter_to_interpreter_bridge_offset_ = 0;
-  interpreter_to_compiled_code_bridge_offset_ = 0;
-  jni_dlsym_lookup_offset_ = 0;
-  quick_generic_jni_trampoline_offset_ = 0;
-  quick_imt_conflict_trampoline_offset_ = 0;
-  quick_resolution_trampoline_offset_ = 0;
-  quick_to_interpreter_bridge_offset_ = 0;
 }
 
 bool OatHeader::IsValid() const {
@@ -175,6 +149,37 @@
   return adler32_checksum_;
 }
 
+void OatHeader::UpdateChecksumWithHeaderData() {
+  UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
+  UpdateChecksum(&instruction_set_features_bitmap_, sizeof(instruction_set_features_bitmap_));
+  UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
+  UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
+  UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
+
+  // Update checksum for variable data size.
+  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
+
+  // Update for data, if existing.
+  if (key_value_store_size_ > 0U) {
+    UpdateChecksum(&key_value_store_, key_value_store_size_);
+  }
+
+  UpdateChecksum(&executable_offset_, sizeof(executable_offset_));
+  UpdateChecksum(&interpreter_to_interpreter_bridge_offset_,
+                 sizeof(interpreter_to_interpreter_bridge_offset_));
+  UpdateChecksum(&interpreter_to_compiled_code_bridge_offset_,
+                 sizeof(interpreter_to_compiled_code_bridge_offset_));
+  UpdateChecksum(&jni_dlsym_lookup_offset_, sizeof(jni_dlsym_lookup_offset_));
+  UpdateChecksum(&quick_generic_jni_trampoline_offset_,
+                 sizeof(quick_generic_jni_trampoline_offset_));
+  UpdateChecksum(&quick_imt_conflict_trampoline_offset_,
+                 sizeof(quick_imt_conflict_trampoline_offset_));
+  UpdateChecksum(&quick_resolution_trampoline_offset_,
+                 sizeof(quick_resolution_trampoline_offset_));
+  UpdateChecksum(&quick_to_interpreter_bridge_offset_,
+                 sizeof(quick_to_interpreter_bridge_offset_));
+}
+
 void OatHeader::UpdateChecksum(const void* data, size_t length) {
   DCHECK(IsValid());
   const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
@@ -205,7 +210,6 @@
   DCHECK_EQ(executable_offset_, 0U);
 
   executable_offset_ = executable_offset;
-  UpdateChecksum(&executable_offset_, sizeof(executable_offset));
 }
 
 const void* OatHeader::GetInterpreterToInterpreterBridge() const {
@@ -225,7 +229,6 @@
   DCHECK_EQ(interpreter_to_interpreter_bridge_offset_, 0U) << offset;
 
   interpreter_to_interpreter_bridge_offset_ = offset;
-  UpdateChecksum(&interpreter_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetInterpreterToCompiledCodeBridge() const {
@@ -244,7 +247,6 @@
   DCHECK_EQ(interpreter_to_compiled_code_bridge_offset_, 0U) << offset;
 
   interpreter_to_compiled_code_bridge_offset_ = offset;
-  UpdateChecksum(&interpreter_to_compiled_code_bridge_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetJniDlsymLookup() const {
@@ -263,7 +265,6 @@
   DCHECK_EQ(jni_dlsym_lookup_offset_, 0U) << offset;
 
   jni_dlsym_lookup_offset_ = offset;
-  UpdateChecksum(&jni_dlsym_lookup_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickGenericJniTrampoline() const {
@@ -282,7 +283,6 @@
   DCHECK_EQ(quick_generic_jni_trampoline_offset_, 0U) << offset;
 
   quick_generic_jni_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_generic_jni_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickImtConflictTrampoline() const {
@@ -301,7 +301,6 @@
   DCHECK_EQ(quick_imt_conflict_trampoline_offset_, 0U) << offset;
 
   quick_imt_conflict_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_imt_conflict_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickResolutionTrampoline() const {
@@ -320,7 +319,6 @@
   DCHECK_EQ(quick_resolution_trampoline_offset_, 0U) << offset;
 
   quick_resolution_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_resolution_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickToInterpreterBridge() const {
@@ -339,7 +337,6 @@
   DCHECK_EQ(quick_to_interpreter_bridge_offset_, 0U) << offset;
 
   quick_to_interpreter_bridge_offset_ = offset;
-  UpdateChecksum(&quick_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
 int32_t OatHeader::GetImagePatchDelta() const {
@@ -367,11 +364,22 @@
   return image_file_location_oat_checksum_;
 }
 
+void OatHeader::SetImageFileLocationOatChecksum(uint32_t image_file_location_oat_checksum) {
+  CHECK(IsValid());
+  image_file_location_oat_checksum_ = image_file_location_oat_checksum;
+}
+
 uint32_t OatHeader::GetImageFileLocationOatDataBegin() const {
   CHECK(IsValid());
   return image_file_location_oat_data_begin_;
 }
 
+void OatHeader::SetImageFileLocationOatDataBegin(uint32_t image_file_location_oat_data_begin) {
+  CHECK(IsValid());
+  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
+  image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
+}
+
 uint32_t OatHeader::GetKeyValueStoreSize() const {
   CHECK(IsValid());
   return key_value_store_size_;
diff --git a/runtime/oat.h b/runtime/oat.h
index 5b780c3..5ed1977 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -31,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '3', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '4', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
@@ -45,15 +45,14 @@
 
   static OatHeader* Create(InstructionSet instruction_set,
                            const InstructionSetFeatures* instruction_set_features,
-                           const std::vector<const DexFile*>* dex_files,
-                           uint32_t image_file_location_oat_checksum,
-                           uint32_t image_file_location_oat_data_begin,
+                           uint32_t dex_file_count,
                            const SafeMap<std::string, std::string>* variable_data);
 
   bool IsValid() const;
   std::string GetValidationErrorMessage() const;
   const char* GetMagic() const;
   uint32_t GetChecksum() const;
+  void UpdateChecksumWithHeaderData();
   void UpdateChecksum(const void* data, size_t length);
   uint32_t GetDexFileCount() const {
     DCHECK(IsValid());
@@ -92,8 +91,11 @@
 
   InstructionSet GetInstructionSet() const;
   uint32_t GetInstructionSetFeaturesBitmap() const;
+
   uint32_t GetImageFileLocationOatChecksum() const;
+  void SetImageFileLocationOatChecksum(uint32_t image_file_location_oat_checksum);
   uint32_t GetImageFileLocationOatDataBegin() const;
+  void SetImageFileLocationOatDataBegin(uint32_t image_file_location_oat_data_begin);
 
   uint32_t GetKeyValueStoreSize() const;
   const uint8_t* GetKeyValueStore() const;
@@ -107,9 +109,7 @@
  private:
   OatHeader(InstructionSet instruction_set,
             const InstructionSetFeatures* instruction_set_features,
-            const std::vector<const DexFile*>* dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uint32_t image_file_location_oat_data_begin,
+            uint32_t dex_file_count,
             const SafeMap<std::string, std::string>* variable_data);
 
   // Returns true if the value of the given key is "true", false otherwise.
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 837662d..6cea902 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -51,6 +51,23 @@
   kIntrinsicMinMaxLong,
   kIntrinsicMinMaxFloat,
   kIntrinsicMinMaxDouble,
+  kIntrinsicCos,
+  kIntrinsicSin,
+  kIntrinsicAcos,
+  kIntrinsicAsin,
+  kIntrinsicAtan,
+  kIntrinsicAtan2,
+  kIntrinsicCbrt,
+  kIntrinsicCosh,
+  kIntrinsicExp,
+  kIntrinsicExpm1,
+  kIntrinsicHypot,
+  kIntrinsicLog,
+  kIntrinsicLog10,
+  kIntrinsicNextAfter,
+  kIntrinsicSinh,
+  kIntrinsicTan,
+  kIntrinsicTanh,
   kIntrinsicSqrt,
   kIntrinsicCeil,
   kIntrinsicFloor,
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 7de6c06..ea193d7 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -33,7 +33,7 @@
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
-    // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero)
+    // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
     // is used to create artificial data dependency from the is_gray
     // load to the ref field (ptr) load to avoid needing a load-load
     // barrier between the two.
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index e7ad731..600b7f9 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -82,7 +82,8 @@
   static void AssertToSpaceInvariant(GcRootSource* gc_root_source, mirror::Object* ref)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static mirror::Object* Mark(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE static mirror::Object* Mark(mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   static mirror::Object* WhitePtr() {
     return reinterpret_cast<mirror::Object*>(white_ptr_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index dedc110..93ca347 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -219,8 +219,6 @@
     UnloadNativeBridge();
   }
 
-  MaybeSaveJitProfilingInfo();
-
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -1220,9 +1218,9 @@
   // Most JNI libraries can just use System.loadLibrary, but libcore can't because it's
   // the library that implements System.loadLibrary!
   {
-    std::string reason;
-    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, &reason)) {
-      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << reason;
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << error_msg;
     }
   }
 
@@ -1293,6 +1291,11 @@
 }
 
 void Runtime::DumpForSigQuit(std::ostream& os) {
+  // Dumping for SIGQIT may cause deadlocks if the the debugger is active. b/26118154
+  if (Dbg::IsDebuggerActive()) {
+    LOG(INFO) << "Skipping DumpForSigQuit due to active debugger";
+    return;
+  }
   GetClassLinker()->DumpForSigQuit(os);
   GetInternTable()->DumpForSigQuit(os);
   GetJavaVM()->DumpForSigQuit(os);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 90539b4..13e3774 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2478,6 +2478,23 @@
   QUICK_ENTRY_POINT_INFO(pCmpgFloat)
   QUICK_ENTRY_POINT_INFO(pCmplDouble)
   QUICK_ENTRY_POINT_INFO(pCmplFloat)
+  QUICK_ENTRY_POINT_INFO(pCos)
+  QUICK_ENTRY_POINT_INFO(pSin)
+  QUICK_ENTRY_POINT_INFO(pAcos)
+  QUICK_ENTRY_POINT_INFO(pAsin)
+  QUICK_ENTRY_POINT_INFO(pAtan)
+  QUICK_ENTRY_POINT_INFO(pAtan2)
+  QUICK_ENTRY_POINT_INFO(pCbrt)
+  QUICK_ENTRY_POINT_INFO(pCosh)
+  QUICK_ENTRY_POINT_INFO(pExp)
+  QUICK_ENTRY_POINT_INFO(pExpm1)
+  QUICK_ENTRY_POINT_INFO(pHypot)
+  QUICK_ENTRY_POINT_INFO(pLog)
+  QUICK_ENTRY_POINT_INFO(pLog10)
+  QUICK_ENTRY_POINT_INFO(pNextAfter)
+  QUICK_ENTRY_POINT_INFO(pSinh)
+  QUICK_ENTRY_POINT_INFO(pTan)
+  QUICK_ENTRY_POINT_INFO(pTanh)
   QUICK_ENTRY_POINT_INFO(pFmod)
   QUICK_ENTRY_POINT_INFO(pL2d)
   QUICK_ENTRY_POINT_INFO(pFmodf)
@@ -2531,6 +2548,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMark)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
 #undef QUICK_ENTRY_POINT_INFO
diff --git a/runtime/thread.h b/runtime/thread.h
index c556c36..6cb895c 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -545,6 +545,13 @@
         OFFSETOF_MEMBER(tls_32bit_sized_values, state_and_flags));
   }
 
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> IsGcMarkingOffset() {
+    return ThreadOffset<pointer_size>(
+        OFFSETOF_MEMBER(Thread, tls32_) +
+        OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
+  }
+
  private:
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index cf27ff2..d75587b 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -116,6 +116,7 @@
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
                                                         mirror::Class* klass,
+                                                        CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
                                                         bool log_hard_failures,
                                                         std::string* error) {
@@ -140,9 +141,9 @@
   }
   if (early_failure) {
     *error = "Verifier rejected class " + PrettyDescriptor(klass) + failure_message;
-    if (Runtime::Current()->IsAotCompiler()) {
+    if (callbacks != nullptr) {
       ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
-      Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
+      callbacks->ClassRejected(ref);
     }
     return kHardFailure;
   }
@@ -154,6 +155,7 @@
                      dex_cache,
                      class_loader,
                      class_def,
+                     callbacks,
                      allow_soft_failures,
                      log_hard_failures,
                      error);
@@ -172,6 +174,7 @@
                                    ClassDataItemIterator* it,
                                    Handle<mirror::DexCache> dex_cache,
                                    Handle<mirror::ClassLoader> class_loader,
+                                   CompilerCallbacks* callbacks,
                                    bool allow_soft_failures,
                                    bool log_hard_failures,
                                    bool need_precise_constants,
@@ -212,6 +215,7 @@
                                                       it->GetMethodCodeItem(),
                                                       method,
                                                       it->GetMethodAccessFlags(),
+                                                      callbacks,
                                                       allow_soft_failures,
                                                       log_hard_failures,
                                                       need_precise_constants,
@@ -241,6 +245,7 @@
                                                         Handle<mirror::DexCache> dex_cache,
                                                         Handle<mirror::ClassLoader> class_loader,
                                                         const DexFile::ClassDef* class_def,
+                                                        CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
                                                         bool log_hard_failures,
                                                         std::string* error) {
@@ -274,6 +279,7 @@
                       &it,
                       dex_cache,
                       class_loader,
+                      callbacks,
                       allow_soft_failures,
                       log_hard_failures,
                       false /* need precise constants */,
@@ -288,6 +294,7 @@
                       &it,
                       dex_cache,
                       class_loader,
+                      callbacks,
                       allow_soft_failures,
                       log_hard_failures,
                       false /* need precise constants */,
@@ -322,6 +329,7 @@
                                                          const DexFile::CodeItem* code_item,
                                                          ArtMethod* method,
                                                          uint32_t method_access_flags,
+                                                         CompilerCallbacks* callbacks,
                                                          bool allow_soft_failures,
                                                          bool log_hard_failures,
                                                          bool need_precise_constants,
@@ -336,6 +344,12 @@
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
     CHECK(!verifier.have_pending_hard_failure_);
+
+    if (code_item != nullptr && callbacks != nullptr) {
+      // Let the interested party know that the method was verified.
+      callbacks->MethodVerified(&verifier);
+    }
+
     if (verifier.failures_.size() != 0) {
       if (VLOG_IS_ON(verifier)) {
         verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
@@ -363,8 +377,14 @@
             verifier.failure_messages_[verifier.failure_messages_.size() - 1]->str();
       }
       result = kHardFailure;
+
+      if (callbacks != nullptr) {
+        // Let the interested party know that we failed the class.
+        ClassReference ref(dex_file, dex_file->GetIndexForClassDef(*class_def));
+        callbacks->ClassRejected(ref);
+      }
     }
-    if (kDebugVerify) {
+    if (VLOG_IS_ON(verifier)) {
       std::cout << "\n" << verifier.info_messages_.str();
       verifier.Dump(std::cout);
     }
@@ -408,13 +428,18 @@
 }
 
 MethodVerifier::MethodVerifier(Thread* self,
-                               const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
+                               const DexFile* dex_file,
+                               Handle<mirror::DexCache> dex_cache,
                                Handle<mirror::ClassLoader> class_loader,
                                const DexFile::ClassDef* class_def,
-                               const DexFile::CodeItem* code_item, uint32_t dex_method_idx,
-                               ArtMethod* method, uint32_t method_access_flags,
-                               bool can_load_classes, bool allow_soft_failures,
-                               bool need_precise_constants, bool verify_to_dump,
+                               const DexFile::CodeItem* code_item,
+                               uint32_t dex_method_idx,
+                               ArtMethod* method,
+                               uint32_t method_access_flags,
+                               bool can_load_classes,
+                               bool allow_soft_failures,
+                               bool need_precise_constants,
+                               bool verify_to_dump,
                                bool allow_thread_suspension)
     : self_(self),
       arena_stack_(Runtime::Current()->GetArenaPool()),
@@ -739,10 +764,7 @@
   result = result && VerifyInstructions();
   // Perform code-flow analysis and return.
   result = result && VerifyCodeFlow();
-  // Compute information for compiler.
-  if (result && runtime->IsCompiler()) {
-    result = runtime->GetCompilerCallbacks()->MethodVerified(this);
-  }
+
   return result;
 }
 
@@ -802,10 +824,6 @@
       // Hard verification failures at compile time will still fail at runtime, so the class is
       // marked as rejected to prevent it from being compiled.
     case VERIFY_ERROR_BAD_CLASS_HARD: {
-      if (Runtime::Current()->IsAotCompiler()) {
-        ClassReference ref(dex_file_, dex_file_->GetIndexForClassDef(*class_def_));
-        Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
-      }
       have_pending_hard_failure_ = true;
       if (VLOG_IS_ON(verifier) && kDumpRegLinesOnHardFailureIfVLOG) {
         ScopedObjectAccess soa(Thread::Current());
@@ -3639,30 +3657,8 @@
   auto* cl = Runtime::Current()->GetClassLinker();
   auto pointer_size = cl->GetImagePointerSize();
 
-  // Check that interface methods are static or match interface classes.
-  // We only allow statics if we don't have default methods enabled.
-  if (klass->IsInterface()) {
-    Runtime* runtime = Runtime::Current();
-    const bool default_methods_supported =
-        runtime == nullptr ||
-        runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
-    if (method_type != METHOD_INTERFACE &&
-        (!default_methods_supported || method_type != METHOD_STATIC)) {
-      Fail(VERIFY_ERROR_CLASS_CHANGE)
-          << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
-          << " is in an interface class " << PrettyClass(klass);
-      return nullptr;
-    }
-  } else {
-    if (method_type == METHOD_INTERFACE) {
-      Fail(VERIFY_ERROR_CLASS_CHANGE)
-          << "interface method " << PrettyMethod(dex_method_idx, *dex_file_)
-          << " is in a non-interface class " << PrettyClass(klass);
-      return nullptr;
-    }
-  }
-
   ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx, pointer_size);
+  bool stash_method = false;
   if (res_method == nullptr) {
     const char* name = dex_file_->GetMethodName(method_id);
     const Signature signature = dex_file_->GetMethodSignature(method_id);
@@ -3675,7 +3671,7 @@
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
-      dex_cache_->SetResolvedMethod(dex_method_idx, res_method, pointer_size);
+      stash_method = true;
     } else {
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
@@ -3704,6 +3700,38 @@
                                       << PrettyMethod(res_method);
     return nullptr;
   }
+
+  // Check that interface methods are static or match interface classes.
+  // We only allow statics if we don't have default methods enabled.
+  //
+  // Note: this check must be after the initializer check, as those are required to fail a class,
+  //       while this check implies an IncompatibleClassChangeError.
+  if (klass->IsInterface()) {
+    Runtime* runtime = Runtime::Current();
+    const bool default_methods_supported =
+        runtime == nullptr ||
+        runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
+    if (method_type != METHOD_INTERFACE &&
+        (!default_methods_supported || method_type != METHOD_STATIC)) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in an interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  } else {
+    if (method_type == METHOD_INTERFACE) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in a non-interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  }
+
+  // Only stash after the above passed. Otherwise the method wasn't guaranteed to be correct.
+  if (stash_method) {
+    dex_cache_->SetResolvedMethod(dex_method_idx, res_method, pointer_size);
+  }
+
   // Check if access is allowed.
   if (!referrer.CanAccessMember(res_method->GetDeclaringClass(), res_method->GetAccessFlags())) {
     Fail(VERIFY_ERROR_ACCESS_METHOD) << "illegal method access (call " << PrettyMethod(res_method)
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 719f0d7..79db576 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -33,6 +33,7 @@
 
 namespace art {
 
+class CompilerCallbacks;
 class Instruction;
 struct ReferenceMap2Visitor;
 class Thread;
@@ -141,6 +142,7 @@
   /* Verify a class. Returns "kNoFailure" on success. */
   static FailureKind VerifyClass(Thread* self,
                                  mirror::Class* klass,
+                                 CompilerCallbacks* callbacks,
                                  bool allow_soft_failures,
                                  bool log_hard_failures,
                                  std::string* error)
@@ -150,6 +152,7 @@
                                  Handle<mirror::DexCache> dex_cache,
                                  Handle<mirror::ClassLoader> class_loader,
                                  const DexFile::ClassDef* class_def,
+                                 CompilerCallbacks* callbacks,
                                  bool allow_soft_failures,
                                  bool log_hard_failures,
                                  std::string* error)
@@ -216,16 +219,34 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item, uint32_t method_idx,
+  MethodVerifier(Thread* self,
+                 const DexFile* dex_file,
+                 Handle<mirror::DexCache> dex_cache,
+                 Handle<mirror::ClassLoader> class_loader,
+                 const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item,
+                 uint32_t method_idx,
                  ArtMethod* method,
-                 uint32_t access_flags, bool can_load_classes, bool allow_soft_failures,
-                 bool need_precise_constants, bool allow_thread_suspension)
+                 uint32_t access_flags,
+                 bool can_load_classes,
+                 bool allow_soft_failures,
+                 bool need_precise_constants,
+                 bool allow_thread_suspension)
           SHARED_REQUIRES(Locks::mutator_lock_)
-      : MethodVerifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx,
-                       method, access_flags, can_load_classes, allow_soft_failures,
-                       need_precise_constants, false, allow_thread_suspension) {}
+      : MethodVerifier(self,
+                       dex_file,
+                       dex_cache,
+                       class_loader,
+                       class_def,
+                       code_item,
+                       method_idx,
+                       method,
+                       access_flags,
+                       can_load_classes,
+                       allow_soft_failures,
+                       need_precise_constants,
+                       false,
+                       allow_thread_suspension) {}
 
   ~MethodVerifier();
 
@@ -299,12 +320,20 @@
   }
 
   // Private constructor for dumping.
-  MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item, uint32_t method_idx,
-                 ArtMethod* method, uint32_t access_flags,
-                 bool can_load_classes, bool allow_soft_failures, bool need_precise_constants,
-                 bool verify_to_dump, bool allow_thread_suspension)
+  MethodVerifier(Thread* self,
+                 const DexFile* dex_file,
+                 Handle<mirror::DexCache> dex_cache,
+                 Handle<mirror::ClassLoader> class_loader,
+                 const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item,
+                 uint32_t method_idx,
+                 ArtMethod* method,
+                 uint32_t access_flags,
+                 bool can_load_classes,
+                 bool allow_soft_failures,
+                 bool need_precise_constants,
+                 bool verify_to_dump,
+                 bool allow_thread_suspension)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Adds the given string to the beginning of the last failure message.
@@ -323,6 +352,7 @@
                             ClassDataItemIterator* it,
                             Handle<mirror::DexCache> dex_cache,
                             Handle<mirror::ClassLoader> class_loader,
+                            CompilerCallbacks* callbacks,
                             bool allow_soft_failures,
                             bool log_hard_failures,
                             bool need_precise_constants,
@@ -350,6 +380,7 @@
                                   const DexFile::CodeItem* code_item,
                                   ArtMethod* method,
                                   uint32_t method_access_flags,
+                                  CompilerCallbacks* callbacks,
                                   bool allow_soft_failures,
                                   bool log_hard_failures,
                                   bool need_precise_constants,
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index c4123d5..946f842 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -37,8 +37,8 @@
 
     // Verify the class
     std::string error_msg;
-    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, true, true, &error_msg) == MethodVerifier::kNoFailure)
-        << error_msg;
+    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, nullptr, true, true, &error_msg)
+                    == MethodVerifier::kNoFailure) << error_msg;
   }
 
   void VerifyDexFile(const DexFile& dex)
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index e2c3afb..2b778d9 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -379,7 +379,10 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad =
+      CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad",
+                  "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)"
+                      "Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index e9946c8..b70ca4f 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -267,11 +267,20 @@
                                          const char* app_code_cache_dir,
                                          const char* isa ATTRIBUTE_UNUSED) {
   struct stat st;
-  if ((app_code_cache_dir != nullptr)
-      && (stat(app_code_cache_dir, &st) == 0)
-      && S_ISDIR(st.st_mode)) {
-    printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+  if (app_code_cache_dir != nullptr) {
+    if (stat(app_code_cache_dir, &st) == 0) {
+      if (S_ISDIR(st.st_mode)) {
+        printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+      } else {
+        printf("Code cache is not a directory.\n");
+      }
+    } else {
+      perror("Error when stat-ing the code_cache:");
+    }
+  } else {
+    printf("app_code_cache_dir is null.\n");
   }
+
   if (art_cbs != nullptr) {
     gNativeBridgeArtCallbacks = art_cbs;
     printf("Native bridge initialized.\n");
diff --git a/test/123-inline-execute2/expected.txt b/test/123-inline-execute2/expected.txt
new file mode 100644
index 0000000..aa74fa3
--- /dev/null
+++ b/test/123-inline-execute2/expected.txt
@@ -0,0 +1,299 @@
+Math.sin(0.0) = 0.000000000000
+Math.sinh(0.0) = 0.000000000000
+Math.asin(0.0) = 0.000000000000
+Math.cos(0.0) = 1.000000000000
+Math.cosh(0.0) = 1.000000000000
+Math.acos(0.0) = 1.570796326795
+Math.tan(0.0) = 0.000000000000
+Math.tanh(0.0) = 0.000000000000
+Math.atan(0.0) = 0.000000000000
+Math.atan2(0.0, 1.0) = 0.000000000000
+Math.sin(0.7853981633974483) = 0.707106781187
+Math.sinh(0.7853981633974483) = 0.868670961486
+Math.asin(0.7853981633974483) = 0.903339110767
+Math.cos(0.7853981633974483) = 0.707106781187
+Math.cosh(0.7853981633974483) = 1.324609089252
+Math.acos(0.7853981633974483) = 0.667457216028
+Math.tan(0.7853981633974483) = 1.000000000000
+Math.tanh(0.7853981633974483) = 0.655794202633
+Math.atan(0.7853981633974483) = 0.665773750028
+Math.atan2(0.7853981633974483, 1.7853981633974483) = 0.414423800577
+Math.sin(1.5707963267948966) = 1.000000000000
+Math.sinh(1.5707963267948966) = 2.301298902307
+Math.asin(1.5707963267948966) = NaN
+Math.cos(1.5707963267948966) = 0.000000000000
+Math.cosh(1.5707963267948966) = 2.509178478658
+Math.acos(1.5707963267948966) = NaN
+Math.tanh(1.5707963267948966) = 0.917152335667
+Math.atan(1.5707963267948966) = 1.003884821854
+Math.atan2(1.5707963267948966, 2.5707963267948966) = 0.548479764417
+Math.sin(2.356194490192345) = 0.707106781187
+Math.sinh(2.356194490192345) = 5.227971924678
+Math.asin(2.356194490192345) = NaN
+Math.cos(2.356194490192345) = -0.707106781187
+Math.cosh(2.356194490192345) = 5.322752149520
+Math.acos(2.356194490192345) = NaN
+Math.tan(2.356194490192345) = -1.000000000000
+Math.tanh(2.356194490192345) = 0.982193380007
+Math.atan(2.356194490192345) = 1.169422824816
+Math.atan2(2.356194490192345, 3.356194490192345) = 0.612096117380
+Math.sin(3.141592653589793) = 0.000000000000
+Math.sinh(3.141592653589793) = 11.548739357258
+Math.asin(3.141592653589793) = NaN
+Math.cos(3.141592653589793) = -1.000000000000
+Math.cosh(3.141592653589793) = 11.591953275522
+Math.acos(3.141592653589793) = NaN
+Math.tan(3.141592653589793) = -0.000000000000
+Math.tanh(3.141592653589793) = 0.996272076221
+Math.atan(3.141592653589793) = 1.262627255679
+Math.atan2(3.141592653589793, 4.141592653589793) = 0.648948780815
+Math.sin(3.9269908169872414) = -0.707106781187
+Math.sinh(3.9269908169872414) = 25.367158319374
+Math.asin(3.9269908169872414) = NaN
+Math.cos(3.9269908169872414) = -0.707106781187
+Math.cosh(3.9269908169872414) = 25.386861192361
+Math.acos(3.9269908169872414) = NaN
+Math.tan(3.9269908169872414) = 1.000000000000
+Math.tanh(3.9269908169872414) = 0.999223894879
+Math.atan(3.9269908169872414) = 1.321447967784
+Math.atan2(3.9269908169872414, 4.926990816987241) = 0.672931229191
+Math.sin(4.71238898038469) = -1.000000000000
+Math.sinh(4.71238898038469) = 55.654397599418
+Math.asin(4.71238898038469) = NaN
+Math.cos(4.71238898038469) = -0.000000000000
+Math.cosh(4.71238898038469) = 55.663380890439
+Math.acos(4.71238898038469) = NaN
+Math.tanh(4.71238898038469) = 0.999838613989
+Math.atan(4.71238898038469) = 1.361691682971
+Math.atan2(4.71238898038469, 5.71238898038469) = 0.689765469251
+Math.sin(5.497787143782138) = -0.707106781187
+Math.sinh(5.497787143782138) = 122.073483514693
+Math.asin(5.497787143782138) = NaN
+Math.cos(5.497787143782138) = 0.707106781187
+Math.cosh(5.497787143782138) = 122.077579339582
+Math.acos(5.497787143782138) = NaN
+Math.tan(5.497787143782138) = -1.000000000000
+Math.tanh(5.497787143782138) = 0.999966449000
+Math.atan(5.497787143782138) = 1.390871988014
+Math.atan2(5.497787143782138, 6.497787143782138) = 0.702226398171
+Math.sin(6.283185307179586) = -0.000000000000
+Math.sinh(6.283185307179586) = 267.744894041016
+Math.asin(6.283185307179586) = NaN
+Math.cos(6.283185307179586) = 1.000000000000
+Math.cosh(6.283185307179586) = 267.746761483748
+Math.acos(6.283185307179586) = NaN
+Math.tan(6.283185307179586) = -0.000000000000
+Math.tanh(6.283185307179586) = 0.999993025340
+Math.atan(6.283185307179586) = 1.412965136507
+Math.atan2(6.283185307179586, 7.283185307179586) = 0.711819549590
+Math.cbrt(-3.0) = -1.442249570307
+Math.log(-3.0) = NaN
+Math.log10(-3.0) = NaN
+Math.log1p(-3.0) = NaN
+Math.exp(-3.0) = 0.049787068368
+Math.expm1(-3.0) = -0.950212931632
+Math.pow(-3.0, -2.0) = 0.111111111111
+Math.hypot(-3.0, -2.0) = 3.605551275464
+Math.cbrt(-2.0) = -1.259921049895
+Math.log(-2.0) = NaN
+Math.log10(-2.0) = NaN
+Math.log1p(-2.0) = NaN
+Math.exp(-2.0) = 0.135335283237
+Math.expm1(-2.0) = -0.864664716763
+Math.pow(-2.0, -1.0) = -0.500000000000
+Math.hypot(-2.0, -1.0) = 2.236067977500
+Math.cbrt(-1.0) = -1.000000000000
+Math.log(-1.0) = NaN
+Math.log10(-1.0) = NaN
+Math.log1p(-1.0) = -Infinity
+Math.exp(-1.0) = 0.367879441171
+Math.expm1(-1.0) = -0.632120558829
+Math.pow(-1.0, 0.0) = 1.000000000000
+Math.hypot(-1.0, 0.0) = 1.000000000000
+Math.cbrt(0.0) = 0.000000000000
+Math.log(0.0) = -Infinity
+Math.log10(0.0) = -Infinity
+Math.log1p(0.0) = 0.000000000000
+Math.exp(0.0) = 1.000000000000
+Math.expm1(0.0) = 0.000000000000
+Math.pow(0.0, 1.0) = 0.000000000000
+Math.hypot(0.0, 1.0) = 1.000000000000
+Math.cbrt(1.0) = 1.000000000000
+Math.log(1.0) = 0.000000000000
+Math.log10(1.0) = 0.000000000000
+Math.log1p(1.0) = 0.693147180560
+Math.exp(1.0) = 2.718281828459
+Math.expm1(1.0) = 1.718281828459
+Math.pow(1.0, 2.0) = 1.000000000000
+Math.hypot(1.0, 2.0) = 2.236067977500
+Math.cbrt(2.0) = 1.259921049895
+Math.log(2.0) = 0.693147180560
+Math.log10(2.0) = 0.301029995664
+Math.log1p(2.0) = 1.098612288668
+Math.exp(2.0) = 7.389056098931
+Math.expm1(2.0) = 6.389056098931
+Math.pow(2.0, 3.0) = 8.000000000000
+Math.hypot(2.0, 3.0) = 3.605551275464
+Math.cbrt(3.0) = 1.442249570307
+Math.log(3.0) = 1.098612288668
+Math.log10(3.0) = 0.477121254720
+Math.log1p(3.0) = 1.386294361120
+Math.exp(3.0) = 20.085536923188
+Math.expm1(3.0) = 19.085536923188
+Math.pow(3.0, 4.0) = 81.000000000000
+Math.hypot(3.0, 4.0) = 5.000000000000
+Math.ceil(0.0001) = 1.000000000000
+Math.floor(0.0001) = 0.000000000000
+Math.nextAfter(1.0, 2.0) = 1.000000000000
+Math.nextAfter(2.0, 1.0) = 2.000000000000
+Math.rint(0.5000001) = 1.000000000000
+StrictMath.sin(0.0) = 0.0
+StrictMath.sinh(0.0) = 0.0
+StrictMath.asin(0.0) = 0.0
+StrictMath.cos(0.0) = 1.0
+StrictMath.cosh(0.0) = 1.0
+StrictMath.acos(0.0) = 1.5707963267948966
+StrictMath.tan(0.0) = 0.0
+StrictMath.tanh(0.0) = 0.0
+StrictMath.atan(0.0) = 0.0
+StrictMath.atan2(0.0, 1.0) = 0.0
+StrictMath.sin(0.7853981633974483) = 0.7071067811865475
+StrictMath.sinh(0.7853981633974483) = 0.8686709614860095
+StrictMath.asin(0.7853981633974483) = 0.9033391107665127
+StrictMath.cos(0.7853981633974483) = 0.7071067811865476
+StrictMath.cosh(0.7853981633974483) = 1.3246090892520057
+StrictMath.acos(0.7853981633974483) = 0.6674572160283838
+StrictMath.tan(0.7853981633974483) = 0.9999999999999999
+StrictMath.tanh(0.7853981633974483) = 0.6557942026326724
+StrictMath.atan(0.7853981633974483) = 0.6657737500283538
+StrictMath.atan2(0.7853981633974483, 1.7853981633974483) = 0.41442380057704103
+StrictMath.sin(1.5707963267948966) = 1.0
+StrictMath.sinh(1.5707963267948966) = 2.3012989023072947
+StrictMath.asin(1.5707963267948966) = NaN
+StrictMath.cos(1.5707963267948966) = 6.123233995736766E-17
+StrictMath.cosh(1.5707963267948966) = 2.5091784786580567
+StrictMath.acos(1.5707963267948966) = NaN
+StrictMath.tan(1.5707963267948966) = 1.633123935319537E16
+StrictMath.tanh(1.5707963267948966) = 0.9171523356672744
+StrictMath.atan(1.5707963267948966) = 1.0038848218538872
+StrictMath.atan2(1.5707963267948966, 2.5707963267948966) = 0.5484797644174059
+StrictMath.sin(2.356194490192345) = 0.7071067811865476
+StrictMath.sinh(2.356194490192345) = 5.227971924677803
+StrictMath.asin(2.356194490192345) = NaN
+StrictMath.cos(2.356194490192345) = -0.7071067811865475
+StrictMath.cosh(2.356194490192345) = 5.322752149519959
+StrictMath.acos(2.356194490192345) = NaN
+StrictMath.tan(2.356194490192345) = -1.0000000000000002
+StrictMath.tanh(2.356194490192345) = 0.9821933800072388
+StrictMath.atan(2.356194490192345) = 1.1694228248157563
+StrictMath.atan2(2.356194490192345, 3.356194490192345) = 0.6120961173796371
+StrictMath.sin(3.141592653589793) = 1.2246467991473532E-16
+StrictMath.sinh(3.141592653589793) = 11.548739357257748
+StrictMath.asin(3.141592653589793) = NaN
+StrictMath.cos(3.141592653589793) = -1.0
+StrictMath.cosh(3.141592653589793) = 11.591953275521519
+StrictMath.acos(3.141592653589793) = NaN
+StrictMath.tan(3.141592653589793) = -1.2246467991473532E-16
+StrictMath.tanh(3.141592653589793) = 0.99627207622075
+StrictMath.atan(3.141592653589793) = 1.2626272556789115
+StrictMath.atan2(3.141592653589793, 4.141592653589793) = 0.6489487808147751
+StrictMath.sin(3.9269908169872414) = -0.7071067811865475
+StrictMath.sinh(3.9269908169872414) = 25.367158319374152
+StrictMath.asin(3.9269908169872414) = NaN
+StrictMath.cos(3.9269908169872414) = -0.7071067811865477
+StrictMath.cosh(3.9269908169872414) = 25.386861192360772
+StrictMath.acos(3.9269908169872414) = NaN
+StrictMath.tan(3.9269908169872414) = 0.9999999999999997
+StrictMath.tanh(3.9269908169872414) = 0.9992238948786412
+StrictMath.atan(3.9269908169872414) = 1.3214479677837223
+StrictMath.atan2(3.9269908169872414, 4.926990816987241) = 0.6729312291908799
+StrictMath.sin(4.71238898038469) = -1.0
+StrictMath.sinh(4.71238898038469) = 55.65439759941754
+StrictMath.asin(4.71238898038469) = NaN
+StrictMath.cos(4.71238898038469) = -1.8369701987210297E-16
+StrictMath.cosh(4.71238898038469) = 55.66338089043867
+StrictMath.acos(4.71238898038469) = NaN
+StrictMath.tan(4.71238898038469) = 5.443746451065123E15
+StrictMath.tanh(4.71238898038469) = 0.9998386139886326
+StrictMath.atan(4.71238898038469) = 1.3616916829711636
+StrictMath.atan2(4.71238898038469, 5.71238898038469) = 0.6897654692509959
+StrictMath.sin(5.497787143782138) = -0.7071067811865477
+StrictMath.sinh(5.497787143782138) = 122.07348351469281
+StrictMath.asin(5.497787143782138) = NaN
+StrictMath.cos(5.497787143782138) = 0.7071067811865474
+StrictMath.cosh(5.497787143782138) = 122.07757933958217
+StrictMath.acos(5.497787143782138) = NaN
+StrictMath.tan(5.497787143782138) = -1.0000000000000004
+StrictMath.tanh(5.497787143782138) = 0.9999664489997958
+StrictMath.atan(5.497787143782138) = 1.390871988014422
+StrictMath.atan2(5.497787143782138, 6.497787143782138) = 0.7022263981709682
+StrictMath.sin(6.283185307179586) = -2.4492935982947064E-16
+StrictMath.sinh(6.283185307179586) = 267.74489404101644
+StrictMath.asin(6.283185307179586) = NaN
+StrictMath.cos(6.283185307179586) = 1.0
+StrictMath.cosh(6.283185307179586) = 267.7467614837482
+StrictMath.acos(6.283185307179586) = NaN
+StrictMath.tan(6.283185307179586) = -2.4492935982947064E-16
+StrictMath.tanh(6.283185307179586) = 0.9999930253396107
+StrictMath.atan(6.283185307179586) = 1.4129651365067377
+StrictMath.atan2(6.283185307179586, 7.283185307179586) = 0.7118195495895945
+StrictMath.cbrt(-3.0) = -1.4422495703074083
+StrictMath.log(-3.0) = NaN
+StrictMath.log10(-3.0) = NaN
+StrictMath.log1p(-3.0) = NaN
+StrictMath.exp(-3.0) = 0.049787068367863944
+StrictMath.expm1(-3.0) = -0.950212931632136
+StrictMath.pow(-3.0, -2.0) = 0.1111111111111111
+StrictMath.hypot(-3.0, -2.0) = 3.605551275463989
+StrictMath.cbrt(-2.0) = -1.2599210498948732
+StrictMath.log(-2.0) = NaN
+StrictMath.log10(-2.0) = NaN
+StrictMath.log1p(-2.0) = NaN
+StrictMath.exp(-2.0) = 0.1353352832366127
+StrictMath.expm1(-2.0) = -0.8646647167633873
+StrictMath.pow(-2.0, -1.0) = -0.5
+StrictMath.hypot(-2.0, -1.0) = 2.23606797749979
+StrictMath.cbrt(-1.0) = -1.0
+StrictMath.log(-1.0) = NaN
+StrictMath.log10(-1.0) = NaN
+StrictMath.log1p(-1.0) = -Infinity
+StrictMath.exp(-1.0) = 0.36787944117144233
+StrictMath.expm1(-1.0) = -0.6321205588285577
+StrictMath.pow(-1.0, 0.0) = 1.0
+StrictMath.hypot(-1.0, 0.0) = 1.0
+StrictMath.cbrt(0.0) = 0.0
+StrictMath.log(0.0) = -Infinity
+StrictMath.log10(0.0) = -Infinity
+StrictMath.log1p(0.0) = 0.0
+StrictMath.exp(0.0) = 1.0
+StrictMath.expm1(0.0) = 0.0
+StrictMath.pow(0.0, 1.0) = 0.0
+StrictMath.hypot(0.0, 1.0) = 1.0
+StrictMath.cbrt(1.0) = 1.0
+StrictMath.log(1.0) = 0.0
+StrictMath.log10(1.0) = 0.0
+StrictMath.log1p(1.0) = 0.6931471805599453
+StrictMath.exp(1.0) = 2.7182818284590455
+StrictMath.expm1(1.0) = 1.718281828459045
+StrictMath.pow(1.0, 2.0) = 1.0
+StrictMath.hypot(1.0, 2.0) = 2.23606797749979
+StrictMath.cbrt(2.0) = 1.2599210498948732
+StrictMath.log(2.0) = 0.6931471805599453
+StrictMath.log10(2.0) = 0.3010299956639812
+StrictMath.log1p(2.0) = 1.0986122886681096
+StrictMath.exp(2.0) = 7.38905609893065
+StrictMath.expm1(2.0) = 6.38905609893065
+StrictMath.pow(2.0, 3.0) = 8.0
+StrictMath.hypot(2.0, 3.0) = 3.605551275463989
+StrictMath.cbrt(3.0) = 1.4422495703074083
+StrictMath.log(3.0) = 1.0986122886681096
+StrictMath.log10(3.0) = 0.47712125471966244
+StrictMath.log1p(3.0) = 1.3862943611198906
+StrictMath.exp(3.0) = 20.085536923187668
+StrictMath.expm1(3.0) = 19.085536923187668
+StrictMath.pow(3.0, 4.0) = 81.0
+StrictMath.hypot(3.0, 4.0) = 5.0
+StrictMath.ceil(0.0001) = 1.0
+StrictMath.floor(0.0001) = 0.0
+StrictMath.nextAfter(1.0, 2.0) = 1.0000000000000002
+StrictMath.rint(0.5000001) = 1.0
diff --git a/test/123-inline-execute2/info.txt b/test/123-inline-execute2/info.txt
new file mode 100644
index 0000000..4a728a7
--- /dev/null
+++ b/test/123-inline-execute2/info.txt
@@ -0,0 +1 @@
+Sanity checks for added InlineNative methods.
diff --git a/test/123-inline-execute2/src/Main.java b/test/123-inline-execute2/src/Main.java
new file mode 100644
index 0000000..9fadcfd
--- /dev/null
+++ b/test/123-inline-execute2/src/Main.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Locale;
+
+public class Main {
+  public static void main(String args[]) {
+    for (int i = 0; i <= 360; i += 45) {
+      double d = i * (Math.PI / 180.0);
+      System.out.println("Math.sin(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.sin(d)));
+
+      System.out.println("Math.sinh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.sinh(d)));
+      System.out.println("Math.asin(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.asin(d)));
+      System.out.println("Math.cos(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.cos(d)));
+      System.out.println("Math.cosh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.cosh(d)));
+      System.out.println("Math.acos(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.acos(d)));
+      if ((i + 90) % 180 != 0) {
+        System.out.println("Math.tan(" + d + ") = "
+            + String.format(Locale.US, "%.12f", Math.tan(d)));
+      }
+      System.out.println("Math.tanh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.tanh(d)));
+      System.out.println("Math.atan(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.atan(d)));
+      System.out.println("Math.atan2(" + d + ", " + (d + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.atan2(d, d + 1.0)));
+    }
+
+    for (int j = -3; j <= 3; j++) {
+      double e = (double) j;
+      System.out.println("Math.cbrt(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.cbrt(e)));
+      System.out.println("Math.log(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log(e)));
+      System.out.println("Math.log10(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log10(e)));
+      System.out.println("Math.log1p(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log1p(e)));
+      System.out.println("Math.exp(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.exp(e)));
+      System.out.println("Math.expm1(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.expm1(e)));
+      System.out.println("Math.pow(" + e + ", " + (e + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.pow(e, e + 1.0)));
+      System.out.println("Math.hypot(" + e + ", " + (e + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.hypot(e, e + 1.0)));
+    }
+
+    System.out.println("Math.ceil(0.0001) = "
+        + String.format(Locale.US, "%.12f", Math.ceil(0.0001)));
+    System.out.println("Math.floor(0.0001) = "
+        + String.format(Locale.US, "%.12f", Math.floor(0.0001)));
+    System.out.println("Math.nextAfter(1.0, 2.0) = "
+        + String.format(Locale.US, "%.12f", Math.nextAfter(1.0, 2.0)));
+    System.out.println("Math.nextAfter(2.0, 1.0) = "
+        + String.format(Locale.US, "%.12f", Math.nextAfter(2.0, 1.0)));
+    System.out.println("Math.rint(0.5000001) = "
+        + String.format(Locale.US, "%.12f", Math.rint(0.5000001)));
+
+    for (int i = 0; i <= 360; i += 45) {
+      double d = i * (StrictMath.PI / 180.0);
+      System.out.println("StrictMath.sin(" + d + ") = " + StrictMath.sin(d));
+      System.out.println("StrictMath.sinh(" + d + ") = " + StrictMath.sinh(d));
+      System.out.println("StrictMath.asin(" + d + ") = " + StrictMath.asin(d));
+      System.out.println("StrictMath.cos(" + d + ") = " + StrictMath.cos(d));
+      System.out.println("StrictMath.cosh(" + d + ") = " + StrictMath.cosh(d));
+      System.out.println("StrictMath.acos(" + d + ") = " + StrictMath.acos(d));
+      System.out.println("StrictMath.tan(" + d + ") = " + StrictMath.tan(d));
+      System.out.println("StrictMath.tanh(" + d + ") = " + StrictMath.tanh(d));
+      System.out.println("StrictMath.atan(" + d + ") = " + StrictMath.atan(d));
+      System.out.println("StrictMath.atan2(" + d + ", " + (d + 1.0) + ") = "
+          + StrictMath.atan2(d, d + 1.0));
+    }
+
+    for (int j = -3; j <= 3; j++) {
+      double e = (double) j;
+      System.out.println("StrictMath.cbrt(" + e + ") = " + StrictMath.cbrt(e));
+      System.out.println("StrictMath.log(" + e + ") = " + StrictMath.log(e));
+      System.out.println("StrictMath.log10(" + e + ") = " + StrictMath.log10(e));
+      System.out.println("StrictMath.log1p(" + e + ") = " + StrictMath.log1p(e));
+      System.out.println("StrictMath.exp(" + e + ") = " + StrictMath.exp(e));
+      System.out.println("StrictMath.expm1(" + e + ") = " + StrictMath.expm1(e));
+      System.out.println("StrictMath.pow(" + e + ", " + (e + 1.0) + ") = "
+          + StrictMath.pow(e, e + 1.0));
+      System.out.println("StrictMath.hypot(" + e + ", " + (e + 1.0) + ") = "
+          + StrictMath.hypot(e, e + 1.0));
+    }
+
+    System.out.println("StrictMath.ceil(0.0001) = " + StrictMath.ceil(0.0001));
+    System.out.println("StrictMath.floor(0.0001) = " + StrictMath.floor(0.0001));
+    System.out.println("StrictMath.nextAfter(1.0, 2.0) = " + StrictMath.nextAfter(1.0, 2.0));
+    System.out.println("StrictMath.rint(0.5000001) = " + StrictMath.rint(0.5000001));
+  }
+
+}
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index 32122e4..865355c 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -16,11 +16,11 @@
 
 public class Main {
 
-  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main keepTest(Main m) {
@@ -31,7 +31,7 @@
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.thisTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.thisTest() instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main thisTest() {
@@ -45,7 +45,7 @@
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main newInstanceRemoveTest() {
     Main m = new Main();
@@ -57,7 +57,7 @@
   /// CHECK:         NullCheck
   /// CHECK:         ArrayGet
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier (after)
   /// CHECK:         NewArray
   /// CHECK-NOT:     NullCheck
   /// CHECK:         ArrayGet
@@ -66,11 +66,11 @@
     return ms[0];
   }
 
-  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier (before)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier (after)
   /// CHECK:         NewInstance
   /// CHECK-NOT:     NullCheck
   public Main ifRemoveTest(boolean flag) {
@@ -83,11 +83,11 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier (before)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier (after)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
   public Main ifKeepTest(boolean flag) {
@@ -98,10 +98,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main forRemoveTest(int count) {
     Main a = new Main();
@@ -114,10 +114,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main forKeepTest(int count) {
     Main a = new Main();
@@ -132,10 +132,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main phiFlowRemoveTest(int count) {
     Main a = new Main();
@@ -154,10 +154,10 @@
     return n.g();
   }
 
-  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main phiFlowKeepTest(int count) {
     Main a = new Main();
@@ -181,7 +181,7 @@
   /// CHECK-START: Main Main.scopeRemoveTest(int, Main) ssa_builder (after)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeRemoveTest(int count, Main a) {
     Main m = null;
@@ -196,10 +196,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main scopeKeepTest(int count, Main a) {
     Main m = new Main();
@@ -214,10 +214,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeIfNotNullRemove(Main m) {
     if (m != null) {
@@ -226,10 +226,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main scopeIfKeep(Main m) {
     if (m == null) {
@@ -258,11 +258,11 @@
 class ListElement {
   private ListElement next;
 
-  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (before)
+  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier (before)
   /// CHECK:         NullCheck
   /// CHECK:         NullCheck
 
-  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (after)
+  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   static boolean isShorter(ListElement x, ListElement y) {
     ListElement xTail = x;
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index f1f80ca..fd4dd5e 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -72,49 +72,49 @@
 
 public class Main {
 
-  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testSimpleRemove() {
     Super s = new SubclassA();
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testSimpleKeep(Super s) {
     ((SubclassA)s).$noinline$f();
   }
 
-  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public String testClassRemove() {
     Object s = SubclassA.class;
     return ((Class)s).getName();
   }
 
-  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier_after_types (before)
+  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (after)
   /// CHECK:         CheckCast
   public String testClassKeep() {
     Object s = SubclassA.class;
     return ((SubclassA)s).$noinline$h();
   }
 
-  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testIfRemove(int x) {
     Super s;
@@ -126,10 +126,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testIfKeep(int x) {
     Super s;
@@ -141,10 +141,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testForRemove(int x) {
     Super s = new SubclassA();
@@ -156,10 +156,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testForKeep(int x) {
     Super s = new SubclassA();
@@ -171,10 +171,10 @@
     ((SubclassC)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testPhiFromCall(int i) {
     Object x;
@@ -186,11 +186,12 @@
     ((SubclassC)x).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
 
-  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOf(Object o) {
     if (o instanceof SubclassC) {
@@ -201,11 +202,101 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier_after_types (before)
+  public static boolean $inline$InstanceofSubclassB(Object o) { return o instanceof SubclassB; }
+  public static boolean $inline$InstanceofSubclassC(Object o) { return o instanceof SubclassC; }
+
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) ssa_builder (after)
+  /// CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
+  /// CHECK-DAG:     <<IOf1:z\d+>> InstanceOf
+  /// CHECK-DAG:                   NotEqual [<<IOf1>>,<<Cst1>>]
+  /// CHECK-DAG:     <<IOf2:z\d+>> InstanceOf
+  /// CHECK-DAG:                   Equal [<<IOf2>>,<<Cst0>>]
+
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (before)
+  /// CHECK:         CheckCast
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (after)
+  /// CHECK-NOT:     CheckCast
+  public void testInstanceOf_NotInlined(Object o) {
+    if ((o instanceof SubclassC) == true) {
+      ((SubclassC)o).$noinline$g();
+    }
+    if ((o instanceof SubclassB) != false) {
+      ((SubclassB)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) ssa_builder (after)
+  /// CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
+  /// CHECK-DAG:     <<IOf1:z\d+>> InstanceOf
+  /// CHECK-DAG:                   Equal [<<IOf1>>,<<Cst1>>]
+  /// CHECK-DAG:     <<IOf2:z\d+>> InstanceOf
+  /// CHECK-DAG:                   NotEqual [<<IOf2>>,<<Cst0>>]
+
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (before)
+  /// CHECK:         CheckCast
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (after)
+  /// CHECK-NOT:     CheckCast
+  public void testNotInstanceOf_NotInlined(Object o) {
+    if ((o instanceof SubclassC) != true) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassC)o).$noinline$g();
+    }
+    if ((o instanceof SubclassB) == false) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassB)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) inliner (after)
+  /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+  /// CHECK-DAG:                   If [<<IOf>>]
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:     CheckCast
+  public void testInstanceOf_Inlined(Object o) {
+    if (!$inline$InstanceofSubclassC(o)) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassC)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) inliner (after)
+  /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+  /// CHECK-DAG:     <<Not:z\d+>>  BooleanNot [<<IOf>>]
+  /// CHECK-DAG:                   If [<<Not>>]
+
+  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:     CheckCast
+  public void testNotInstanceOf_Inlined(Object o) {
+    if ($inline$InstanceofSubclassC(o)) {
+      ((SubclassC)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (after)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
   public void testInstanceOfKeep(Object o) {
@@ -217,11 +308,11 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfNested(Object o) {
     if (o instanceof SubclassC) {
@@ -233,10 +324,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhi(int i) {
     Object o;
@@ -251,10 +342,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfInFor(int n) {
     Object o = new SubclassA();
@@ -268,10 +359,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfSubclass() {
     Object o = new SubclassA();
@@ -280,10 +371,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhiSubclass(int i) {
     Object o;
@@ -298,10 +389,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhiTop(int i) {
     Object o;
@@ -316,10 +407,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfSubclassInFor(int n) {
     Object o = new SubclassA();
@@ -333,10 +424,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfTopInFor(int n) {
     Object o = new SubclassA();
@@ -361,10 +452,10 @@
   public SubclassA a = new SubclassA();
   public static SubclassA b = new SubclassA();
 
-  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceFieldGetSimpleRemove() {
     Main m = new Main();
@@ -372,10 +463,10 @@
     ((SubclassA)a).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testStaticFieldGetSimpleRemove() {
     Super b = Main.b;
@@ -384,36 +475,36 @@
 
   public SubclassA $noinline$getSubclass() { throw new RuntimeException(); }
 
-  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testArraySimpleRemove() {
     Super[] b = new SubclassA[10];
     SubclassA[] c = (SubclassA[])b;
   }
 
-  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInvokeSimpleRemove() {
     Super b = $noinline$getSubclass();
     ((SubclassA)b).$noinline$g();
   }
-  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testArrayGetSimpleRemove() {
     Super[] a = new SubclassA[10];
     ((SubclassA)a[0]).$noinline$g();
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) ssa_builder (after)
   /// CHECK:         LoadException klass:java.lang.ArithmeticException can_be_null:false exact:false
   public int testLoadExceptionInCatchNonExact(int x, int y) {
     try {
@@ -423,7 +514,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) ssa_builder (after)
   /// CHECK:         LoadException klass:FinalException can_be_null:false exact:true
   public int testLoadExceptionInCatchExact(int x) {
     try {
@@ -437,7 +528,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) ssa_builder (after)
   /// CHECK:         LoadException klass:java.lang.Throwable can_be_null:false exact:false
   public int testLoadExceptionInCatchAll(int x, int y) {
     try {
@@ -458,7 +549,7 @@
     return genericFinal.get();
   }
 
-  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation (after)
+  /// CHECK-START: SubclassC Main.inlineGenerics() ssa_builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SubclassC exact:false
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
@@ -470,7 +561,7 @@
     return c;
   }
 
-  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation (after)
+  /// CHECK-START: Final Main.inlineGenericsFinal() ssa_builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Final exact:true
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
@@ -512,7 +603,7 @@
     return new SubclassA();
   }
 
-  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>> Phi klass:Super
   /// CHECK:                   NullCheck [<<Phi>>] klass:Super
 
@@ -534,7 +625,7 @@
   /// CHECK:                        CheckCast [<<Param>>,<<Clazz>>]
   /// CHECK:                        BoundType [<<Param>>] can_be_null:true
 
-  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier (after)
   /// CHECK:      <<This:l\d+>>     ParameterValue
   /// CHECK:      <<Param:l\d+>>    ParameterValue
   /// CHECK:      <<Clazz:l\d+>>    LoadClass
@@ -546,7 +637,7 @@
   }
 
 
-  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) reference_type_propagation (after)
+  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) ssa_builder (after)
   /// CHECK:      ParameterValue klass:Main can_be_null:false exact:false
   /// CHECK:      ParameterValue klass:Super can_be_null:true exact:false
   /// CHECK:      ParameterValue
@@ -562,7 +653,7 @@
 
   private int mainField = 0;
 
-  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) reference_type_propagation (after)
+  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private SuperInterface getWiderType(boolean cond, Interface a, OtherInterface b) {
@@ -618,7 +709,7 @@
     getSuper();
   }
 
-  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) ssa_builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
   /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
@@ -631,7 +722,7 @@
     }
   }
 
-  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) ssa_builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
   /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
@@ -647,7 +738,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() ssa_builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
   /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
diff --git a/test/477-checker-bound-type/src/Main.java b/test/477-checker-bound-type/src/Main.java
index c873702..0f65e44 100644
--- a/test/477-checker-bound-type/src/Main.java
+++ b/test/477-checker-bound-type/src/Main.java
@@ -17,7 +17,7 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) ssa_builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForIf(Object a) {
     if (a != null) {
@@ -27,7 +27,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) ssa_builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
@@ -37,7 +37,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) ssa_builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForIf(Object a) {
     if (a == null) {
@@ -47,7 +47,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) ssa_builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 17e88ce..ced3e50 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -25,6 +25,9 @@
 }
 
 class TestClass {
+  static {
+    sTestClassObj = new TestClass(-1, -2);
+  }
   TestClass() {
   }
   TestClass(int i, int j) {
@@ -37,6 +40,7 @@
   TestClass next;
   String str;
   static int si;
+  static TestClass sTestClassObj;
 }
 
 class SubTestClass extends TestClass {
@@ -115,10 +119,11 @@
   }
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (before)
-  /// CHECK: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
-  /// CHECK: InstanceFieldSet
   /// CHECK: NewInstance
+  /// CHECK: StaticFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldGet
@@ -127,24 +132,31 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (after)
-  /// CHECK: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
-  /// CHECK: InstanceFieldSet
   /// CHECK: NewInstance
-  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: StaticFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
+  /// CHECK-NOT: StaticFieldGet
 
-  // A new allocation shouldn't alias with pre-existing values.
+  // A new allocation (even non-singleton) shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
     // Do an allocation here to avoid the HLoadClass and HClinitCheck
     // at the second allocation.
     new TestClass();
+    TestClass obj1 = TestClass.sTestClassObj;
+    TestClass obj2 = new TestClass();  // Cannot alias with obj or obj1 which pre-exist.
+    obj.next = obj2;  // Make obj2 a non-singleton.
+    // All stores below need to stay since obj/obj1/obj2 are not singletons.
     obj.i = 1;
-    obj.next.j = 2;
-    TestClass obj2 = new TestClass();
+    obj1.j = 2;
+    // Following stores won't kill values of obj.i and obj1.j.
     obj2.i = 3;
     obj2.j = 4;
-    return obj.i + obj.next.j + obj2.i + obj2.j;
+    return obj.i + obj1.j + obj2.i + obj2.j;
   }
 
   /// CHECK-START: int Main.test4(TestClass, boolean) load_store_elimination (before)
@@ -441,16 +453,14 @@
   }
 
   /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (before)
-  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
 
   /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (after)
-  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
+  /// CHECK-NOT: {{f\d+}} ArrayGet
 
-  // I/F, J/D aliasing should keep the load/store.
+  // I/F, J/D aliasing should not happen any more and LSE should eliminate the load.
   static float test19(float[] fa1, float[] fa2) {
     fa1[0] = fa2[0];
     return fa1[0];
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
index e9f16c0..9a9f0b6 100644
--- a/test/540-checker-rtp-bug/src/Main.java
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -21,14 +21,14 @@
 }
 
 public class Main {
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) ssa_builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
   /// CHECK:    <<Ret:l\d+>>     BoundType [<<Phi>>] klass:Final
   /// CHECK:                     Return [<<Ret>>]
 
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier (after)
   /// CHECK:    <<Phi:l\d+>>     Phi
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
@@ -43,7 +43,7 @@
     return (Final) x;
   }
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) ssa_builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
@@ -65,7 +65,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) ssa_builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<NC:l\d+>>      NullCheck [<<Phi>>]
   /// CHECK:    <<Ret:l\d+>>     InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString
diff --git a/test/542-bitfield-rotates/expected.txt b/test/542-bitfield-rotates/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/542-bitfield-rotates/expected.txt
diff --git a/test/542-bitfield-rotates/info.txt b/test/542-bitfield-rotates/info.txt
new file mode 100644
index 0000000..961be3b
--- /dev/null
+++ b/test/542-bitfield-rotates/info.txt
@@ -0,0 +1 @@
+Tests bitfield rotate simplification in optimizing compiler.
diff --git a/test/542-bitfield-rotates/src/Main.java b/test/542-bitfield-rotates/src/Main.java
new file mode 100644
index 0000000..f2bc153
--- /dev/null
+++ b/test/542-bitfield-rotates/src/Main.java
@@ -0,0 +1,423 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void main(String args[]) throws Exception {
+    test_Integer_right_v_csubv();
+    test_Long_right_v_csubv();
+
+    test_Integer_right_constant_v();
+    test_Long_right_constant_v();
+
+    test_Integer_left_csubv_v();
+    test_Long_left_csubv_v();
+
+    test_Integer_right_v_negv();
+    test_Long_right_v_negv();
+
+    test_Integer_left_negv_v();
+    test_Long_left_negv_v();
+
+    test_Integer_left_constant_v();
+    test_Long_left_constant_v();
+  }
+
+  public static boolean doThrow = false;
+
+  public static int $noinline$rotate_int_right_reg_v_csubv(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  public static void test_Integer_right_v_csubv() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE - 1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE + 1), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x80000000, 1), 0x40000000);
+  }
+
+  public static long $noinline$rotate_long_right_reg_v_csubv(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << (64 - distance));
+  }
+
+  public static void test_Long_right_v_csubv() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE - 1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -(Long.SIZE + 1)), 0x22);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x8000000000000000L, 1), 0x4000000000000000L);
+  }
+
+  public static int $noinline$rotate_int_left_reg_csubv_v(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  public static void test_Integer_left_csubv_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, 1), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE - 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE + 1), 0x22);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -(Integer.SIZE - 1)), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0xC0000000, 1), 0x80000001);
+  }
+
+  public static long $noinline$rotate_long_left_reg_csubv_v(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> (64 - distance)) | (value << distance);
+  }
+
+  public static void test_Long_left_csubv_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, 1), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE + 1), 0x22);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -(Long.SIZE - 1)), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0xC000000000000000L, 1), 0x8000000000000001L);
+  }
+
+  public static int $noinline$rotate_int_right_reg_v_negv(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << -distance);
+  }
+
+  public static void test_Integer_right_v_negv() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE - 1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE + 1), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x80000000, 1), 0x40000000);
+  }
+
+  public static long $noinline$rotate_long_right_reg_v_negv(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << -distance);
+  }
+
+  public static void test_Long_right_v_negv() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE - 1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -(Long.SIZE + 1)), 0x22);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x8000000000000000L, 1), 0x4000000000000000L);
+  }
+
+  public static int $noinline$rotate_int_left_reg_negv_v(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -distance) | (value << distance);
+  }
+
+  public static void test_Integer_left_negv_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, 1), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE - 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE + 1), 0x22);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -(Integer.SIZE - 1)), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0xC0000000, 1), 0x80000001);
+  }
+
+  public static long $noinline$rotate_long_left_reg_negv_v(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -distance) | (value << distance);
+  }
+
+  public static void test_Long_left_negv_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, 1), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE + 1), 0x22);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -(Long.SIZE - 1)), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0xC000000000000000L, 1), 0x8000000000000001L);
+  }
+
+  public static int $noinline$rotate_int_right_constant_0(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 0) | (value << 0);
+  }
+
+  public static int $noinline$rotate_int_right_constant_1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 1) | (value << -1);
+  }
+
+  public static int $noinline$rotate_int_right_constant_m1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -1) | (value << 1);
+  }
+
+  public static int $noinline$rotate_int_right_constant_16(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 16) | (value << -16);
+  }
+
+  public static void test_Integer_right_constant_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_constant_0(0x11), 0x11);
+    assertIntEquals($noinline$rotate_int_right_constant_1(0x11), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_constant_m1(0x11), 0x22);
+    assertIntEquals($noinline$rotate_int_right_constant_16(0x11), 0x110000);
+  }
+
+  public static long $noinline$rotate_long_right_constant_0(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 0) | (value << 0);
+  }
+
+  public static long $noinline$rotate_long_right_constant_1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 1) | (value << -1);
+  }
+
+  public static long $noinline$rotate_long_right_constant_m1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -1) | (value << 1);
+  }
+
+  public static long $noinline$rotate_long_right_constant_16(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 16) | (value << -16);
+  }
+
+  public static long $noinline$rotate_long_right_constant_32(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 32) | (value << -32);
+  }
+
+  public static long $noinline$rotate_long_right_constant_48(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 48) | (value << -48);
+  }
+
+  public static long $noinline$rotate_long_right_constant_64(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 64) | (value << -64);
+  }
+
+  public static void test_Long_right_constant_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_constant_0(0x11), 0x11);
+    assertLongEquals($noinline$rotate_long_right_constant_1(0x11), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_constant_m1(0x11), 0x22);
+    assertLongEquals($noinline$rotate_long_right_constant_16(0x11), 0x11000000000000L);
+    assertLongEquals($noinline$rotate_long_right_constant_32(0x11), 0x1100000000L);
+    assertLongEquals($noinline$rotate_long_right_constant_48(0x11), 0x110000L);
+  }
+
+  public static int $noinline$rotate_int_left_constant_0(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 0) | (value >>> 0);
+  }
+
+  public static int $noinline$rotate_int_left_constant_1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 1) | (value >>> -1);
+  }
+
+  public static int $noinline$rotate_int_left_constant_m1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << -1) | (value >>> 1);
+  }
+
+  public static int $noinline$rotate_int_left_constant_16(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 16) | (value >>> -16);
+  }
+
+  public static void test_Integer_left_constant_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_constant_0(0x11), 0x11);
+    assertIntEquals($noinline$rotate_int_left_constant_1(0x11), 0x22);
+    assertIntEquals($noinline$rotate_int_left_constant_m1(0x11), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_constant_16(0x11), 0x110000);
+  }
+
+  public static long $noinline$rotate_long_left_constant_0(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 0) | (value >>> 0);
+  }
+
+  public static long $noinline$rotate_long_left_constant_1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 1) | (value >>> -1);
+  }
+
+  public static long $noinline$rotate_long_left_constant_m1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << -1) | (value >>> 1);
+  }
+
+  public static long $noinline$rotate_long_left_constant_16(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 16) | (value >>> -16);
+  }
+
+  public static long $noinline$rotate_long_left_constant_32(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 32) | (value >>> -32);
+  }
+
+  public static long $noinline$rotate_long_left_constant_48(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 48) | (value >>> -48);
+  }
+
+  public static long $noinline$rotate_long_left_constant_64(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 64) | (value >>> -64);
+  }
+
+  public static void test_Long_left_constant_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_constant_0(0x11), 0x11);
+    assertLongEquals($noinline$rotate_long_left_constant_1(0x11), 0x22);
+    assertLongEquals($noinline$rotate_long_left_constant_m1(0x11), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_constant_16(0x11), 0x110000L);
+    assertLongEquals($noinline$rotate_long_left_constant_32(0x11), 0x1100000000L);
+    assertLongEquals($noinline$rotate_long_left_constant_48(0x11), 0x11000000000000L);
+  }
+
+}
diff --git a/test/549-checker-types-merge/src/Main.java b/test/549-checker-types-merge/src/Main.java
index dc27f10..917073b 100644
--- a/test/549-checker-types-merge/src/Main.java
+++ b/test/549-checker-types-merge/src/Main.java
@@ -38,14 +38,14 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeNullContant(boolean cond) {
     return cond ? null : new Main();
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassExtendsB b) {
@@ -53,7 +53,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassSuper b) {
@@ -61,7 +61,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassSuper a, ClassSuper b) {
@@ -69,7 +69,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassOtherSuper a, ClassSuper b) {
@@ -77,7 +77,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClassWithInterface(boolean cond, ClassImplementsInterfaceA a, InterfaceSuper b) {
@@ -85,7 +85,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClassWithInterface(boolean cond, ClassSuper a, InterfaceSuper b) {
@@ -93,7 +93,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceSuper b) {
@@ -101,7 +101,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceSuper b) {
@@ -109,7 +109,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceExtendsB b) {
@@ -117,7 +117,7 @@
     return cond ? a : b;
   }
 
-    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) reference_type_propagation (after)
+    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceOtherSuper b) {
diff --git a/test/552-checker-primitive-typeprop/expected.txt b/test/552-checker-primitive-typeprop/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/expected.txt
diff --git a/test/552-checker-primitive-typeprop/info.txt b/test/552-checker-primitive-typeprop/info.txt
new file mode 100644
index 0000000..9d69056
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/info.txt
@@ -0,0 +1,2 @@
+Test that phis with environment uses which can be properly typed are kept
+in --debuggable mode.
\ No newline at end of file
diff --git a/test/552-checker-primitive-typeprop/smali/ArrayGet.smali b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
new file mode 100644
index 0000000..042fa0c
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
@@ -0,0 +1,245 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LArrayGet;
+.super Ljava/lang/Object;
+
+
+# Test phi with fixed-type ArrayGet as an input and a matching second input.
+# The phi should be typed accordingly.
+
+## CHECK-START: void ArrayGet.matchingFixedType(float[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFixedType(float[], float) ssa_builder (after)
+## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
+## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
+## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
+.method public static matchingFixedType([FF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-float v2, v0, v1  # float use fixes type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with fixed-type ArrayGet as an input and a conflicting second input.
+# The phi should be eliminated due to the conflict.
+
+## CHECK-START: void ArrayGet.conflictingFixedType(float[], int) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType(float[], int) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static conflictingFixedType([FI)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-float v2, v0, v1  # float use fixes type
+
+  if-eqz p1, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Same test as the one above, only this time tests that type of ArrayGet is not
+# changed.
+
+## CHECK-START: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK:     {{i\d+}} ArrayGet
+.method public static conflictingFixedType2([IF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-int v2, v0, v1    # int use fixes type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with free-type ArrayGet as an input and a matching second input.
+# The phi should be typed accordingly.
+
+## CHECK-START: void ArrayGet.matchingFreeType(float[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFreeType(float[], float) ssa_builder (after)
+## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
+## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
+## CHECK-DAG:                ArraySet [{{l\d+}},{{i\d+}},<<Aget>>]
+## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
+.method public static matchingFreeType([FF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value, should be float but has no typed use
+  aput v0, p0, v1       # aput does not disambiguate the type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with free-type ArrayGet as an input and a conflicting second input.
+# The phi will be kept and typed according to the second input despite the
+# conflict.
+
+## CHECK-START: void ArrayGet.conflictingFreeType(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFreeType(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+.method public static conflictingFreeType([IF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value, should be int but has no typed use
+  aput v0, p0, v1
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test that real use of ArrayGet is propagated through phis. The following test
+# case uses ArrayGet indirectly through two phis. It also creates an unused
+# conflicting phi which should not be preserved.
+
+## CHECK-START: void ArrayGet.conflictingPhiUses(int[], float, boolean, boolean, boolean) ssa_builder (after)
+## CHECK:         InvokeStaticOrDirect env:[[{{i\d+}},{{i\d+}},_,{{i\d+}},{{.*}}
+
+.method public static conflictingPhiUses([IFZZZ)V
+  .registers 10
+
+  const v0, 0x0
+
+  # Create v1 = Phi [0x0, int ArrayGet]
+  move v1, v0
+  if-eqz p2, :else1
+  aget v1, p0, v0
+  :else1
+
+  # Create v2 = Phi [v1, float]
+  move v2, v1
+  if-eqz p3, :else2
+  move v2, p1
+  :else2
+
+  # Create v3 = Phi [v1, int]
+  move v3, v1
+  if-eqz p4, :else3
+  move v3, v0
+  :else3
+
+  # Use v3 as int.
+  add-int/lit8 v4, v3, 0x2a
+
+  # Create env uses.
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+
+  return-void
+.end method
+
+# Test that the right ArrayGet equivalent is always selected. The following test
+# case uses ArrayGet as float through one phi and as an indeterminate type through
+# another. The situation needs to be resolved so that only one instruction
+# remains.
+
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) ssa_builder (after)
+## CHECK:         {{f\d+}} ArrayGet
+
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) ssa_builder (after)
+## CHECK-NOT:     {{i\d+}} ArrayGet
+
+.method public static typedVsUntypedPhiUse([FFZZ)V
+  .registers 10
+
+  const v0, 0x0
+
+  # v1 = float ArrayGet
+  aget v1, p0, v0
+
+  # Create v2 = Phi [v1, 0.0f]
+  move v2, v1
+  if-eqz p2, :else1
+  move v2, v0
+  :else1
+
+  # Use v2 as float
+  cmpl-float v2, v2, p1
+
+  # Create v3 = Phi [v1, 0.0f]
+  move v3, v1
+  if-eqz p3, :else2
+  move v3, v0
+  :else2
+
+  # Use v3 without a determinate type.
+  aput v3, p0, v0
+
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
new file mode 100644
index 0000000..395feaa
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
@@ -0,0 +1,52 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSsaBuilder;
+.super Ljava/lang/Object;
+
+# Check that a dead phi with a live equivalent is replaced in an environment. The
+# following test case throws an exception and uses v0 afterwards. However, v0
+# contains a phi that is interpreted as int for the environment, and as float for
+# instruction use. SsaBuilder must substitute the int variant before removing it,
+# otherwise running the code with an array short enough to throw will crash at
+# runtime because v0 is undefined.
+
+## CHECK-START: int SsaBuilder.environmentPhi(boolean, int[]) ssa_builder (after)
+## CHECK-DAG:     <<Cst0:f\d+>>  FloatConstant 0
+## CHECK-DAG:     <<Cst2:f\d+>>  FloatConstant 2
+## CHECK-DAG:     <<Phi:f\d+>>   Phi [<<Cst0>>,<<Cst2>>]
+## CHECK-DAG:                    BoundsCheck env:[[<<Phi>>,{{i\d+}},{{z\d+}},{{l\d+}}]]
+
+.method public static environmentPhi(Z[I)I
+  .registers 4
+
+  const v0, 0x0
+  if-eqz p0, :else
+  const v0, 0x40000000
+  :else
+  # v0 = phi that can be both int and float
+
+  :try_start
+  const v1, 0x3
+  aput v1, p1, v1
+  const v0, 0x1     # generate catch phi for v0
+  const v1, 0x4
+  aput v1, p1, v1
+  :try_end
+  .catchall {:try_start .. :try_end} :use_as_float
+
+  :use_as_float
+  float-to-int v0, v0
+  return v0
+.end method
\ No newline at end of file
diff --git a/test/552-checker-primitive-typeprop/smali/TypePropagation.smali b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
new file mode 100644
index 0000000..58682a1
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
@@ -0,0 +1,136 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTypePropagation;
+.super Ljava/lang/Object;
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDeadPhi(boolean, boolean, int, float, float) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeDeadPhi(ZZIFF)V
+  .registers 8
+
+  if-eqz p0, :after1
+  move p2, p3
+  :after1
+  # p2 = merge(int,float) = conflict
+
+  if-eqz p1, :after2
+  move p2, p4
+  :after2
+  # p2 = merge(conflict,float) = conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeSameType(boolean, int, int) ssa_builder (after)
+## CHECK:     {{i\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeSameType(ZII)V
+  .registers 8
+  if-eqz p0, :after
+  move p1, p2
+  :after
+  # p1 = merge(int,int) = int
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeVoidInput(boolean, boolean, int, int) ssa_builder (after)
+## CHECK:     {{i\d+}} Phi
+## CHECK:     {{i\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeVoidInput(ZZII)V
+  .registers 8
+  :loop
+  # p2 = void (loop phi) => p2 = merge(int,int) = int
+  if-eqz p0, :after
+  move p2, p3
+  :after
+  # p2 = merge(void,int) = int
+  if-eqz p1, :loop
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDifferentSize(boolean, int, long) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeDifferentSize(ZIJ)V
+  .registers 8
+  if-eqz p0, :after
+  move-wide p1, p2
+  :after
+  # p1 = merge(int,long) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeRefFloat(boolean, float, java.lang.Object) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeRefFloat(ZFLjava/lang/Object;)V
+  .registers 8
+  if-eqz p0, :after
+  move-object p1, p2
+  :after
+  # p1 = merge(float,reference) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Success(boolean, float) ssa_builder (after)
+## CHECK:     {{f\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeIntFloat_Success(ZF)V
+  .registers 8
+  if-eqz p0, :after
+  const/4 p1, 0x0
+  :after
+  # p1 = merge(float,0x0) = float
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Fail(boolean, int, float) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeIntFloat_Fail(ZIF)V
+  .registers 8
+  if-eqz p0, :after
+  move p1, p2
+  :after
+  # p1 = merge(int,float) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.updateAllUsersOnConflict(boolean, boolean, int, float, int) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static updateAllUsersOnConflict(ZZIFI)V
+  .registers 8
+
+  :loop1
+  # loop phis for all args
+  # p2 = merge(int,float) = float? => conflict
+  move p2, p3
+  if-eqz p0, :loop1
+
+  :loop2
+  # loop phis for all args
+  # requests float equivalent of p4 phi in loop1 => conflict
+  # propagates conflict to loop2's phis
+  move p2, p4
+  if-eqz p1, :loop2
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/src/Main.java b/test/552-checker-primitive-typeprop/src/Main.java
new file mode 100644
index 0000000..fe2343e
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result, expected=" + expected + ", actual=" + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("SsaBuilder");
+    Method m = c.getMethod("environmentPhi", new Class[] { boolean.class, int[].class });
+
+    int[] array = new int[3];
+    int result;
+
+    result = (Integer) m.invoke(null, new Object[] { true, array } );
+    assertEquals(2, result);
+
+    result = (Integer) m.invoke(null, new Object[] { false, array } );
+    assertEquals(0, result);
+  }
+}
diff --git a/test/554-jit-profile-file/expected.txt b/test/554-jit-profile-file/expected.txt
new file mode 100644
index 0000000..cde211e
--- /dev/null
+++ b/test/554-jit-profile-file/expected.txt
@@ -0,0 +1,7 @@
+JNI_OnLoad called
+ProfileInfo:
+:classes.dex
+  java.lang.String Main.hotMethod()
+  void Main.main(java.lang.String[])
+:classes2.dex
+  java.lang.String OtherDex.hotMethod()
diff --git a/test/554-jit-profile-file/info.txt b/test/554-jit-profile-file/info.txt
new file mode 100644
index 0000000..b1bfe81
--- /dev/null
+++ b/test/554-jit-profile-file/info.txt
@@ -0,0 +1 @@
+Check that saving and restoring profile files works correctly in a JIT environment.
diff --git a/test/554-jit-profile-file/offline_profile.cc b/test/554-jit-profile-file/offline_profile.cc
new file mode 100644
index 0000000..75e441f
--- /dev/null
+++ b/test/554-jit-profile-file/offline_profile.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file.h"
+
+#include "jit/offline_profiling_info.h"
+#include "jni.h"
+#include "mirror/class-inl.h"
+#include "oat_file_assistant.h"
+#include "oat_file_manager.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_getProfileInfoDump(
+      JNIEnv* env, jclass cls, jstring filename) {
+  std::string dex_location;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    dex_location = soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetDexFile()->GetLocation();
+  }
+  const OatFile* oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile();
+  std::vector<std::unique_ptr<const DexFile>> dex_files =
+      OatFileAssistant::LoadDexFiles(*oat_file, dex_location.c_str());
+  const char* filename_chars = env->GetStringUTFChars(filename, nullptr);
+
+  std::vector<const DexFile*> dex_files_raw;
+  for (size_t i = 0; i < dex_files.size(); i++) {
+    dex_files_raw.push_back(dex_files[i].get());
+  }
+
+  ProfileCompilationInfo info(filename_chars);
+
+  std::string result = info.Load(dex_files_raw)
+      ? info.DumpInfo(/*print_full_dex_location*/false)
+      : "Could not load profile info";
+
+  env->ReleaseStringUTFChars(filename, filename_chars);
+  // Return the dump of the profile info. It will be compared against a golden value.
+  return env->NewStringUTF(result.c_str());
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/554-jit-profile-file/run b/test/554-jit-profile-file/run
new file mode 100644
index 0000000..f93b32f
--- /dev/null
+++ b/test/554-jit-profile-file/run
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} \
+  -Xcompiler-option --compiler-filter=interpret-only \
+  --runtime-option -Xjitsaveprofilinginfo \
+  --runtime-option -Xusejit:true \
+  --runtime-option -Xjitwarmupthreshold:2 \
+  --runtime-option -Xjitthreshold:4 \
+  "${@}"
diff --git a/test/554-jit-profile-file/src-multidex/OtherDex.java b/test/554-jit-profile-file/src-multidex/OtherDex.java
new file mode 100644
index 0000000..51644db
--- /dev/null
+++ b/test/554-jit-profile-file/src-multidex/OtherDex.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+
+public class OtherDex {
+  public void coldMethod() {
+    hotMethod();
+  }
+
+  public String hotMethod() {
+    HashMap<String, String> map = new HashMap<String, String>();
+    for (int i = 0; i < 10; i++) {
+      map.put("" + i, "" + i + 1);
+    }
+    return map.get("1");
+  }
+}
diff --git a/test/554-jit-profile-file/src/Main.java b/test/554-jit-profile-file/src/Main.java
new file mode 100644
index 0000000..98297ed
--- /dev/null
+++ b/test/554-jit-profile-file/src/Main.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.HashMap;
+
+public class Main {
+
+  public void coldMethod() {
+    hotMethod();
+  }
+
+  public String hotMethod() {
+    HashMap<String, String> map = new HashMap<String, String>();
+    for (int i = 0; i < 10; i++) {
+      map.put("" + i, "" + i + 1);
+    }
+    return map.get("1");
+  }
+
+  private static final String PKG_NAME = "test.package";
+  private static final String APP_DIR_PREFIX = "app_dir_";
+  private static final String CODE_CACHE = "code_cache";
+  private static final String PROFILE_FILE = PKG_NAME + ".prof";
+  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
+  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
+  private static final int JIT_INVOCATION_COUNT = 200;
+
+  /* needs to match Runtime:: kProfileBackground */
+  private static final int PROFILE_BACKGROUND = 1;
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+
+    File file = null;
+    File appDir = null;
+    File profileDir = null;
+    File profileFile = null;
+    try {
+      // We don't know where we have rights to create the code_cache. So create
+      // a dummy temporary file and get its parent directory. That will serve as
+      // the app directory.
+      file = createTempFile();
+      appDir = new File(file.getParent(), APP_DIR_PREFIX + file.getName());
+      appDir.mkdir();
+      profileDir = new File(appDir, CODE_CACHE);
+      profileDir.mkdir();
+
+      // Registering the app info will set the profile file name.
+      VMRuntime.registerAppInfo(PKG_NAME, appDir.getPath());
+
+      // Make sure the hot methods are jitted.
+      Main m = new Main();
+      OtherDex o = new OtherDex();
+      for (int i = 0; i < JIT_INVOCATION_COUNT; i++) {
+        m.hotMethod();
+        o.hotMethod();
+      }
+
+      // Sleep for 2 second to make sure that the methods had a chance to get compiled.
+      Thread.sleep(2000);
+      // Updating the process state to BACKGROUND will trigger profile saving.
+      VMRuntime.updateProcessState(PROFILE_BACKGROUND);
+
+      // Check that the profile file exists.
+      profileFile = new File(profileDir, PROFILE_FILE);
+      if (!profileFile.exists()) {
+        throw new RuntimeException("No profile file found");
+      }
+      // Dump the profile file.
+      // We know what methods are hot and we compare with the golden `expected` output.
+      System.out.println(getProfileInfoDump(profileFile.getPath()));
+    } finally {
+      if (file != null) {
+        file.delete();
+      }
+      if (profileFile != null) {
+        profileFile.delete();
+      }
+      if (profileDir != null) {
+        profileDir.delete();
+      }
+      if (appDir != null) {
+        appDir.delete();
+      }
+    }
+  }
+
+  private static class VMRuntime {
+    private static final Method registerAppInfoMethod;
+    private static final Method updateProcessStateMethod;
+    private static final Method getRuntimeMethod;
+    static {
+      try {
+        Class c = Class.forName("dalvik.system.VMRuntime");
+        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
+            String.class, String.class, String.class);
+        updateProcessStateMethod = c.getDeclaredMethod("updateProcessState", Integer.TYPE);
+        getRuntimeMethod = c.getDeclaredMethod("getRuntime");
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void registerAppInfo(String pkgName, String appDir) throws Exception {
+      registerAppInfoMethod.invoke(null, pkgName, appDir, null);
+    }
+    public static void updateProcessState(int state) throws Exception {
+      Object runtime = getRuntimeMethod.invoke(null);
+      updateProcessStateMethod.invoke(runtime, state);
+    }
+  }
+
+  static native String getProfileInfoDump(
+      String filename);
+
+  private static File createTempFile() throws Exception {
+    try {
+      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+}
diff --git a/test/557-checker-instruction-simplifier-ror/expected.txt b/test/557-checker-instruction-simplifier-ror/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/expected.txt
diff --git a/test/557-checker-instruction-simplifier-ror/info.txt b/test/557-checker-instruction-simplifier-ror/info.txt
new file mode 100644
index 0000000..f9a86f8
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/info.txt
@@ -0,0 +1 @@
+Tests simplification of bitfield rotate patterns in optimizing compiler.
diff --git a/test/557-checker-instruction-simplifier-ror/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
new file mode 100644
index 0000000..027f262
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/src/Main.java
@@ -0,0 +1,659 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:i\d+>>       InvokeStaticOrDirect intrinsic:IntegerRotateRight
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static int rotateIntegerRight(int value, int distance) {
+    return java.lang.Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:i\d+>>       InvokeStaticOrDirect intrinsic:IntegerRotateLeft
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static int rotateIntegerLeft(int value, int distance) {
+    return java.lang.Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:j\d+>>       InvokeStaticOrDirect intrinsic:LongRotateRight
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static long rotateLongRight(long value, int distance) {
+    return java.lang.Long.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:j\d+>>       InvokeStaticOrDirect intrinsic:LongRotateLeft
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static long rotateLongLeft(long value, int distance) {
+    return java.lang.Long.rotateLeft(value, distance);
+  }
+
+  //  (i >>> #distance) | (i << #(reg_bits - distance))
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Const30:i\d+>>      IntConstant 30
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Const30>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_c(int value) {
+    return (value >>> 2) | (value << 30);
+  }
+
+  /// CHECK-START: int Main.ror_int_constant_c_c_0(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c_0(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_c_0(int value) {
+    return (value >>> 2) | (value << 62);
+  }
+
+  //  (j >>> #distance) | (j << #(reg_bits - distance))
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Const62:i\d+>>      IntConstant 62
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Const62>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long ror_long_constant_c_c(long value) {
+    return (value >>> 2) | (value << 62);
+  }
+
+  /// CHECK-START: long Main.ror_long_constant_c_c_0(long) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_constant_c_c_0(long value) {
+    return (value >>> 2) | (value << 30);
+  }
+
+  //  (i >>> #distance) | (i << #-distance)
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ConstNeg2>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_negc(int value) {
+    return (value >>> 2) | (value << -2);
+  }
+
+  //  (j >>> #distance) | (j << #-distance)
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ConstNeg2>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long ror_long_constant_c_negc(long value) {
+    return (value >>> 2) | (value << -2);
+  }
+
+  //  (i >>> distance) | (i << (#reg_bits - distance)
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Sub
+  public static int ror_int_reg_v_csubv(int value, int distance) {
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  //  (distance = x - y)
+  //  (i >>> distance) | (i << (#reg_bits - distance)
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub32>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          Sub
+  /// CHECK-NOT:      Sub
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_subv_csubv(int value, int x, int y) {
+    int distance = x - y;
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub32>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Or>>,<<Sub32>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Ror>>,<<Sub32>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_subv_csubv_env(int value, int x, int y) {
+    int distance = x - y;
+    int bits_minus_dist = 32 - distance;
+    return ((value >>> distance) | (value << bits_minus_dist)) + bits_minus_dist;
+  }
+
+  //  (j >>> distance) | (j << (#reg_bits - distance)
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Sub>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Sub
+  public static long ror_long_reg_v_csubv(long value, int distance) {
+    return (value >>> distance) | (value << (64 - distance));
+  }
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv_0(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_reg_v_csubv_0(long value, int distance) {
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  /// CHECK-START: long Main.ror_long_subv_csubv_0(long, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_subv_csubv_0(long value, int x, int y) {
+    int distance = x - y;
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  //  (i >>> (#reg_bits - distance)) | (i << distance)
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Sub>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_reg_csubv_v(int value, int distance) {
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (distance = x - y)
+  //  (i >>> (#reg_bits - distance)) | (i << distance)
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Sub32>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK:          <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          Sub
+  /// CHECK:          Sub
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_csubv_subv(int value, int x, int y) {
+    int distance = x - y;
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (j >>> (#reg_bits - distance)) | (j << distance)
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Sub>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long rol_long_reg_csubv_v(long value, int distance) {
+    return (value >>> (64 - distance)) | (value << distance);
+  }
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v_0(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long rol_long_reg_csubv_v_0(long value, int distance) {
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (i >>> distance) | (i << -distance) (i.e. libcore's Integer.rotateRight)
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Neg
+  public static int ror_int_reg_v_negv(int value, int distance) {
+    return (value >>> distance) | (value << -distance);
+  }
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Or>>,<<Neg>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Ror>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Sub>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_reg_v_negv_env(int value, int distance) {
+    int neg_distance = -distance;
+    return ((value >>> distance) | (value << neg_distance)) + neg_distance;
+  }
+
+  //  (j >>> distance) | (j << -distance) (i.e. libcore's Long.rotateRight)
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Neg
+  public static long ror_long_reg_v_negv(long value, int distance) {
+    return (value >>> distance) | (value << -distance);
+  }
+
+  //  (i << distance) | (i >>> -distance) (i.e. libcore's Integer.rotateLeft)
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_reg_negv_v(int value, int distance) {
+    return (value << distance) | (value >>> -distance);
+  }
+
+  //  (j << distance) | (j >>> -distance) (i.e. libcore's Long.rotateLeft)
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long rol_long_reg_negv_v(long value, int distance) {
+    return (value << distance) | (value >>> -distance);
+  }
+
+  //  (j << distance) + (j >>> -distance)
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Add:j\d+>>          Add [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:  Add
+  /// CHECK-NOT:  Shl
+  /// CHECK-NOT:  UShr
+  public static long rol_long_reg_v_negv_add(long value, int distance) {
+    return (value << distance) + (value >>> -distance);
+  }
+
+  //  (j << distance) ^ (j >>> -distance)
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Xor:j\d+>>          Xor [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Xor>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:  Xor
+  /// CHECK-NOT:  Shl
+  /// CHECK-NOT:  UShr
+  public static long rol_long_reg_v_negv_xor(long value, int distance) {
+    return (value << distance) ^ (value >>> -distance);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(2, ror_int_constant_c_c(8));
+    assertIntEquals(2, ror_int_constant_c_c_0(8));
+    assertLongEquals(2L, ror_long_constant_c_c(8L));
+
+    assertIntEquals(2, ror_int_constant_c_negc(8));
+    assertLongEquals(2L, ror_long_constant_c_negc(8L));
+
+    assertIntEquals(2, ror_int_reg_v_csubv(8, 2));
+    assertLongEquals(2L, ror_long_reg_v_csubv(8L, 2));
+
+    assertIntEquals(2, ror_int_subv_csubv(8, 2, 0));
+    assertIntEquals(32, ror_int_subv_csubv_env(8, 2, 0));
+    assertIntEquals(32, rol_int_csubv_subv(8, 2, 0));
+
+    assertIntEquals(32, rol_int_reg_csubv_v(8, 2));
+    assertLongEquals(32L, rol_long_reg_csubv_v(8L, 2));
+
+    assertIntEquals(2, ror_int_reg_v_negv(8, 2));
+    assertIntEquals(0, ror_int_reg_v_negv_env(8, 2));
+    assertLongEquals(2L, ror_long_reg_v_negv(8L, 2));
+
+    assertIntEquals(32, rol_int_reg_negv_v(8, 2));
+    assertLongEquals(32L, rol_long_reg_negv_v(8L, 2));
+
+    assertLongEquals(32L, rol_long_reg_v_negv_add(8L, 2));
+    assertLongEquals(32L, rol_long_reg_v_negv_xor(8L, 2));
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index ebefeea..27f5b5d 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -48,4 +48,5 @@
 b/23502994 (check-cast)
 b/25494456
 b/21869691
+b/26143249
 Done!
diff --git a/test/800-smali/smali/b_26143249.smali b/test/800-smali/smali/b_26143249.smali
new file mode 100644
index 0000000..aa69e84
--- /dev/null
+++ b/test/800-smali/smali/b_26143249.smali
@@ -0,0 +1,20 @@
+# Make sure we accept non-abstract classes with abstract members.
+
+.class public LB26143249;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public run()V
+    .registers 1
+    invoke-virtual {p0}, LB26143249;->abs()V
+    return-void
+.end method
+
+.method public abstract abs()V
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 3b62a46..cc3b0b4 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -141,6 +141,8 @@
                 null));
         testCases.add(new TestCase("b/21869691", "B21869691A", "run", null,
                 new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/26143249", "B26143249", "run", null,
+                new AbstractMethodError(), null));
     }
 
     public void runTests() {
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index f74a516..f84dfe6 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -38,7 +38,8 @@
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
   497-inlining-and-class-loader/clear_dex_cache.cc \
-  543-env-long-ref/env_long_ref.cc
+  543-env-long-ref/env_long_ref.cc \
+  554-jit-profile-file/offline_profile.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 0925d36..54ceb75 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -276,7 +276,8 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS := \
   117-nopatchoat \
   118-noimage-dex2oat \
-  119-noimage-patchoat
+  119-noimage-patchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-relocate,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -298,6 +299,7 @@
   412-new-array \
   471-uninitialized-locals \
   506-verify-aput \
+  554-jit-profile-file \
   800-smali
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
@@ -356,13 +358,15 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
   119-noimage-patchoat \
   137-cfi \
-  138-duplicate-classes-check2
+  138-duplicate-classes-check2 \
+  554-jit-profile-file
 
 # This test fails without an image.
 TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
@@ -413,7 +417,8 @@
 # Known broken tests for the interpreter.
 # CFI unwinding expects managed frames.
 TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 
 ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -454,6 +459,7 @@
     441-checker-inliner \
     510-checker-try-catch \
     536-checker-intrinsic-optimization \
+    557-checker-instruction-simplifier-ror \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -466,6 +472,21 @@
 
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
 
+# Known broken tests for the mips64 optimizing compiler backend.
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
+    557-checker-instruction-simplifier-ror \
+
+ifeq (mips64,$(TARGET_ARCH))
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS :=
+
 # Tests that should fail when the optimizing compiler compiles them non-debuggable.
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
   454-get-vreg \
@@ -493,12 +514,17 @@
 # Tests that should fail in the read barrier configuration.
 # 055: Exceeds run time limits due to read barrier instrumentation.
 # 137: Read barrier forces interpreter. Cannot run this with the interpreter.
+# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
+#      more parallel moves (at least on x86), thus some Checker assertions may fail.
 # 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
 #      handled in the read barrier configuration.
+# 554: Cannot run in interpreter mode and this rule covers both: the compiler and the interpreter.
 TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
   055-enum-performance                    \
   137-cfi                                 \
-  537-checker-arraycopy
+  484-checker-register-hints              \
+  537-checker-arraycopy                   \
+  554-jit-profile-file
 
 ifeq ($(ART_USE_READ_BARRIER),true)
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
@@ -520,7 +546,8 @@
 # Tests that should fail in the heap poisoning configuration with the interpreter.
 # 137: Cannot run this with the interpreter.
 TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 
 ifeq ($(ART_HEAP_POISONING),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))
diff --git a/test/dexdump/bytecodes.txt b/test/dexdump/bytecodes.txt
index d14c47c..4c8b79b 100755
--- a/test/dexdump/bytecodes.txt
+++ b/test/dexdump/bytecodes.txt
@@ -196,6 +196,7 @@
       name          : 'icon'
       type          : 'I'
       access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 2130837504
   Instance fields   -
   Direct methods    -
     #0              : (in Lcom/google/android/test/R$drawable;)
diff --git a/test/dexdump/bytecodes.xml b/test/dexdump/bytecodes.xml
index 0581677..d08c2e9 100755
--- a/test/dexdump/bytecodes.xml
+++ b/test/dexdump/bytecodes.xml
@@ -3,6 +3,7 @@
 >
 <class name="SuppressLint"
  extends="java.lang.Object"
+ interface="true"
  abstract="true"
  static="false"
  final="false"
@@ -23,6 +24,7 @@
 </class>
 <class name="TargetApi"
  extends="java.lang.Object"
+ interface="true"
  abstract="true"
  static="false"
  final="false"
@@ -46,6 +48,7 @@
 >
 <class name="BuildConfig"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -70,6 +73,7 @@
 </class>
 <class name="R.attr"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -85,6 +89,7 @@
 </class>
 <class name="R.drawable"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -97,6 +102,7 @@
  static="true"
  final="true"
  visibility="public"
+ value="2130837504"
 >
 </field>
 <constructor name="R.drawable"
@@ -109,6 +115,7 @@
 </class>
 <class name="R"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -124,6 +131,7 @@
 </class>
 <class name="Test"
  extends="android.app.Activity"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
diff --git a/test/dexdump/checkers.xml b/test/dexdump/checkers.xml
index 232254f..4e56ea2 100755
--- a/test/dexdump/checkers.xml
+++ b/test/dexdump/checkers.xml
@@ -3,6 +3,7 @@
 >
 <class name="Checkers"
  extends="android.app.Activity"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
@@ -112,6 +113,7 @@
 </class>
 <class name="CheckersView"
  extends="android.view.View"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
@@ -331,6 +333,7 @@
 </class>
 <class name="a"
  extends="java.lang.Thread"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -500,6 +503,7 @@
 </class>
 <class name="g"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
diff --git a/test/dexdump/staticfields.dex b/test/dexdump/staticfields.dex
new file mode 100644
index 0000000..a07c46e
--- /dev/null
+++ b/test/dexdump/staticfields.dex
Binary files differ
diff --git a/test/dexdump/staticfields.lst b/test/dexdump/staticfields.lst
new file mode 100644
index 0000000..5375b8e
--- /dev/null
+++ b/test/dexdump/staticfields.lst
@@ -0,0 +1,2 @@
+#staticfields.dex
+0x000001bc 8 StaticFields <init> ()V StaticFields.java 24
diff --git a/test/dexdump/staticfields.txt b/test/dexdump/staticfields.txt
new file mode 100644
index 0000000..022605f
--- /dev/null
+++ b/test/dexdump/staticfields.txt
@@ -0,0 +1,126 @@
+Processing 'staticfields.dex'...
+Opened 'staticfields.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : 52d4fc6d
+signature           : 6e82...2f27
+file_size           : 1264
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 28
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 12
+type_ids_off        : 224 (0x0000e0)
+proto_ids_size       : 1
+proto_ids_off        : 272 (0x000110)
+field_ids_size      : 12
+field_ids_off       : 284 (0x00011c)
+method_ids_size     : 2
+method_ids_off      : 380 (0x00017c)
+class_defs_size     : 1
+class_defs_off      : 396 (0x00018c)
+data_size           : 836
+data_off            : 428 (0x0001ac)
+
+Class #0 header:
+class_idx           : 6
+access_flags        : 1 (0x0001)
+superclass_idx      : 7
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 11
+annotations_off     : 0 (0x000000)
+class_data_off      : 1067 (0x00042b)
+static_fields_size  : 12
+instance_fields_size: 0
+direct_methods_size : 1
+virtual_methods_size: 0
+
+Class #0            -
+  Class descriptor  : 'LStaticFields;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LStaticFields;)
+      name          : 'test00_public_static_final_byte_42'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 42
+    #1              : (in LStaticFields;)
+      name          : 'test01_public_static_final_short_43'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 43
+    #2              : (in LStaticFields;)
+      name          : 'test02_public_static_final_char_X'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 88
+    #3              : (in LStaticFields;)
+      name          : 'test03_public_static_final_int_44'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 44
+    #4              : (in LStaticFields;)
+      name          : 'test04_public_static_final_long_45'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 45
+    #5              : (in LStaticFields;)
+      name          : 'test05_public_static_final_float_46_47'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 46.470001
+    #6              : (in LStaticFields;)
+      name          : 'test06_public_static_final_double_48_49'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 48.490000
+    #7              : (in LStaticFields;)
+      name          : 'test07_public_static_final_string'
+      type          : 'Ljava/lang/String;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : "abc \\><\"'&\t\r\n"
+    #8              : (in LStaticFields;)
+      name          : 'test08_public_static_final_object_null'
+      type          : 'Ljava/lang/Object;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : null
+    #9              : (in LStaticFields;)
+      name          : 'test09_public_static_final_boolean_true'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : true
+    #10              : (in LStaticFields;)
+      name          : 'test10_private_static_final_int_50'
+      type          : 'I'
+      access        : 0x001a (PRIVATE STATIC FINAL)
+      value         : 50
+    #11              : (in LStaticFields;)
+      name          : 'test99_empty_value'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LStaticFields;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0001ac:                                        |[0001ac] StaticFields.<init>:()V
+0001bc: 7010 0100 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0001
+0001c2: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=24
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LStaticFields; 
+
+  Virtual methods   -
+  source_file_idx   : 11 (StaticFields.java)
+
diff --git a/test/dexdump/staticfields.xml b/test/dexdump/staticfields.xml
new file mode 100644
index 0000000..c906f0a
--- /dev/null
+++ b/test/dexdump/staticfields.xml
@@ -0,0 +1,130 @@
+<api>
+<package name=""
+>
+<class name="StaticFields"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<field name="test00_public_static_final_byte_42"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="42"
+>
+</field>
+<field name="test01_public_static_final_short_43"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="43"
+>
+</field>
+<field name="test02_public_static_final_char_X"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="88"
+>
+</field>
+<field name="test03_public_static_final_int_44"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="44"
+>
+</field>
+<field name="test04_public_static_final_long_45"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="45"
+>
+</field>
+<field name="test05_public_static_final_float_46_47"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="46.470001"
+>
+</field>
+<field name="test06_public_static_final_double_48_49"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="48.490000"
+>
+</field>
+<field name="test07_public_static_final_string"
+ type="java.lang.String"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="abc \>&lt;&quot;'&amp;&#x9;&#xD;&#xA;"
+>
+</field>
+<field name="test08_public_static_final_object_null"
+ type="java.lang.Object"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="null"
+>
+</field>
+<field name="test09_public_static_final_boolean_true"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="true"
+>
+</field>
+<field name="test99_empty_value"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+>
+</field>
+<constructor name="StaticFields"
+ type="StaticFields"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+</class>
+</package>
+</api>