Merge "Honor --force-determinism for vdex."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 11af1c0..0a465c4 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -83,16 +83,16 @@
 ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX))
 
 $(ART_TEST_HOST_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 $(ART_TEST_TARGET_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 $(ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 $(ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 # Dex file dependencies for each gtest.
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
@@ -171,6 +171,12 @@
 # TODO: document why this is needed.
 ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_DEFAULT_64) $(HOST_CORE_IMAGE_DEFAULT_32)
 
+# The dexdiag test requires the dexdiag utility.
+ART_GTEST_dexdiag_test_HOST_DEPS := \
+  $(HOST_OUT_EXECUTABLES)/dexdiag
+ART_GTEST_dexdiag_test_TARGET_DEPS := \
+  dexdiag
+
 # The dexdump test requires an image and the dexdump utility.
 # TODO: rename into dexdump when migration completes
 ART_GTEST_dexdump_test_HOST_DEPS := \
@@ -227,6 +233,8 @@
   $(TARGET_CORE_IMAGE_DEFAULT_64) \
   $(TARGET_CORE_IMAGE_DEFAULT_32) \
   oatdump
+ART_GTEST_oatdump_image_test_HOST_DEPS := $(ART_GTEST_oatdump_test_HOST_DEPS)
+ART_GTEST_oatdump_image_test_TARGET_DEPS := $(ART_GTEST_oatdump_test_TARGET_DEPS)
 
 # Profile assistant tests requires profman utility.
 ART_GTEST_profile_assistant_test_HOST_DEPS := \
@@ -242,6 +250,7 @@
     art_compiler_tests \
     art_compiler_host_tests \
     art_dex2oat_tests \
+    art_dexdiag_tests \
     art_dexdump_tests \
     art_dexlayout_tests \
     art_dexlist_tests \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index c733feb..3f9ea15 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -56,11 +56,11 @@
     core_dex2oat_dependency := $(DEX2OAT)
   endif
   ifeq ($(1),interpreter)
-    core_compile_options += --compiler-filter=interpret-only
+    core_compile_options += --compiler-filter=quicken
     core_infix := -interpreter
   endif
   ifeq ($(1),interp-ac)
-    core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail
+    core_compile_options += --compiler-filter=extract --runtime-arg -Xverify:softfail
     core_infix := -interp-ac
   endif
   ifneq ($(filter-out interpreter interp-ac optimizing,$(1)),)
@@ -166,11 +166,11 @@
     core_dex2oat_dependency := $(DEX2OAT)
   endif
   ifeq ($(1),interpreter)
-    core_compile_options += --compiler-filter=interpret-only
+    core_compile_options += --compiler-filter=quicken
     core_infix := -interpreter
   endif
   ifeq ($(1),interp-ac)
-    core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail
+    core_compile_options += --compiler-filter=extract --runtime-arg -Xverify:softfail
     core_infix := -interp-ac
   endif
   ifneq ($(filter-out interpreter interp-ac optimizing,$(1)),)
diff --git a/cmdline/cmdline_parser.h b/cmdline/cmdline_parser.h
index d82fd48..32480dd 100644
--- a/cmdline/cmdline_parser.h
+++ b/cmdline/cmdline_parser.h
@@ -612,7 +612,7 @@
 template <typename TVariantMap,
           template <typename TKeyValue> class TVariantMapKey>
 template <typename TArg>
-CmdlineParser<TVariantMap, TVariantMapKey>::ArgumentBuilder<TArg>
+typename CmdlineParser<TVariantMap, TVariantMapKey>::template ArgumentBuilder<TArg>
 CmdlineParser<TVariantMap, TVariantMapKey>::CreateArgumentBuilder(
     CmdlineParser<TVariantMap, TVariantMapKey>::Builder& parent) {
   return CmdlineParser<TVariantMap, TVariantMapKey>::ArgumentBuilder<TArg>(
diff --git a/compiler/Android.bp b/compiler/Android.bp
index dec8b57..a2b07af 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -28,7 +28,6 @@
         "compiled_method.cc",
         "debug/elf_debug_writer.cc",
         "dex/dex_to_dex_compiler.cc",
-        "dex/dex_to_dex_decompiler.cc",
         "dex/inline_method_analyser.cc",
         "dex/verified_method.cc",
         "dex/verification_results.cc",
@@ -339,6 +338,7 @@
         "elf_writer_test.cc",
         "exception_test.cc",
         "image_test.cc",
+        "image_write_read_test.cc",
         "jni/jni_compiler_test.cc",
         "linker/multi_oat_relative_patcher_test.cc",
         "linker/output_stream_test.cc",
diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc
index 9a8d27c..e486e2e 100644
--- a/compiler/dex/dex_to_dex_decompiler_test.cc
+++ b/compiler/dex/dex_to_dex_decompiler_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dex/dex_to_dex_decompiler.h"
+#include "dex_to_dex_decompiler.h"
 
 #include "class_linker.h"
 #include "compiler/common_compiler_test.h"
@@ -38,7 +38,7 @@
     TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
     TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_options_->boot_image_ = false;
-    compiler_options_->SetCompilerFilter(CompilerFilter::kInterpretOnly);
+    compiler_options_->SetCompilerFilter(CompilerFilter::kQuicken);
     compiler_driver_->CompileAll(class_loader,
                                  GetDexFiles(class_loader),
                                  /* verifier_deps */ nullptr,
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 00a7d44..3f0df3b 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -104,11 +104,12 @@
   // This method should only be called for classes verified at compile time,
   // which have no verifier error, nor has methods that we know will throw
   // at runtime.
-  AtomicMap::InsertResult result = atomic_verified_methods_.Insert(
+  atomic_verified_methods_.Insert(
       ref,
       /*expected*/ nullptr,
       new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false));
-  DCHECK_EQ(result, AtomicMap::kInsertResultSuccess);
+  // We don't check the result of `Insert` as we could insert twice for the same
+  // MethodReference in the presence of duplicate methods.
 }
 
 void VerificationResults::AddRejectedClass(ClassReference ref) {
@@ -126,7 +127,7 @@
 
 bool VerificationResults::IsCandidateForCompilation(MethodReference&,
                                                     const uint32_t access_flags) {
-  if (!compiler_options_->IsBytecodeCompilationEnabled()) {
+  if (!compiler_options_->IsAotCompilationEnabled()) {
     return false;
   }
   // Don't compile class initializers unless kEverything.
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f41ccc2..a8ab7c6 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -45,7 +45,6 @@
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex/dex_to_dex_compiler.h"
-#include "dex/dex_to_dex_decompiler.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_options.h"
@@ -421,7 +420,7 @@
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
   //    compilation.
-  if (GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
+  if (GetCompilerOptions().IsAnyCompilationEnabled()) {
     Compile(class_loader, dex_files, timings);
   }
   if (dump_stats_) {
@@ -431,61 +430,6 @@
   FreeThreadPools();
 }
 
-// In-place unquicken the given `dex_files` based on `quickening_info`.
-static void Unquicken(const std::vector<const DexFile*>& dex_files,
-                      const ArrayRef<const uint8_t>& quickening_info,
-                      bool decompile_return_instruction) {
-  const uint8_t* quickening_info_ptr = quickening_info.data();
-  const uint8_t* const quickening_info_end = quickening_info.data() + quickening_info.size();
-  for (const DexFile* dex_file : dex_files) {
-    for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
-      const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-      const uint8_t* class_data = dex_file->GetClassData(class_def);
-      if (class_data == nullptr) {
-        continue;
-      }
-      ClassDataItemIterator it(*dex_file, class_data);
-      // Skip fields
-      while (it.HasNextStaticField()) {
-        it.Next();
-      }
-      while (it.HasNextInstanceField()) {
-        it.Next();
-      }
-
-      while (it.HasNextDirectMethod()) {
-        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
-        if (code_item != nullptr) {
-          uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
-          quickening_info_ptr += sizeof(uint32_t);
-          optimizer::ArtDecompileDEX(*code_item,
-                                     ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
-                                     decompile_return_instruction);
-          quickening_info_ptr += quickening_size;
-        }
-        it.Next();
-      }
-
-      while (it.HasNextVirtualMethod()) {
-        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
-        if (code_item != nullptr) {
-          uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
-          quickening_info_ptr += sizeof(uint32_t);
-          optimizer::ArtDecompileDEX(*code_item,
-                                     ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
-                                     decompile_return_instruction);
-          quickening_info_ptr += quickening_size;
-        }
-        it.Next();
-      }
-      DCHECK(!it.HasNext());
-    }
-  }
-  if (quickening_info_ptr != quickening_info_end) {
-    LOG(FATAL) << "Failed to use all quickening info";
-  }
-}
-
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
                                 VdexFile* vdex_file,
@@ -494,15 +438,12 @@
     // TODO: we unquicken unconditionnally, as we don't know
     // if the boot image has changed. How exactly we'll know is under
     // experimentation.
-    if (vdex_file->GetQuickeningInfo().size() != 0) {
-      TimingLogger::ScopedTiming t("Unquicken", timings);
-      // We do not decompile a RETURN_VOID_NO_BARRIER into a RETURN_VOID, as the quickening
-      // optimization does not depend on the boot image (the optimization relies on not
-      // having final fields in a class, which does not change for an app).
-      Unquicken(dex_files,
-                vdex_file->GetQuickeningInfo(),
-                /* decompile_return_instruction */ false);
-    }
+    TimingLogger::ScopedTiming t("Unquicken", timings);
+    // We do not decompile a RETURN_VOID_NO_BARRIER into a RETURN_VOID, as the quickening
+    // optimization does not depend on the boot image (the optimization relies on not
+    // having final fields in a class, which does not change for an app).
+    VdexFile::Unquicken(dex_files, vdex_file->GetQuickeningInfo());
+
     Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(
         new verifier::VerifierDeps(dex_files, vdex_file->GetVerifierDepsData()));
   }
@@ -514,7 +455,7 @@
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   auto* const runtime = Runtime::Current();
-  DCHECK(driver.GetCompilerOptions().IsAnyMethodCompilationEnabled());
+  DCHECK(driver.GetCompilerOptions().IsQuickeningCompilationEnabled());
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = runtime->GetClassLinker();
   mirror::Class* klass = class_linker->FindClass(self, descriptor, class_loader);
@@ -986,7 +927,8 @@
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
-  if (compiler_options_->IsAnyMethodCompilationEnabled()) {
+  if (compiler_options_->IsAnyCompilationEnabled()) {
+    // Resolve eagerly to prepare for compilation.
     Resolve(class_loader, dex_files, timings);
     VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
   }
@@ -1014,7 +956,7 @@
                << "situations. Please check the log.";
   }
 
-  if (compiler_options_->IsAnyMethodCompilationEnabled()) {
+  if (compiler_options_->IsAnyCompilationEnabled()) {
     if (kIsDebugBuild) {
       EnsureVerifiedOrVerifyAtRuntime(class_loader, dex_files);
     }
@@ -2017,7 +1959,7 @@
     return false;
   }
 
-  bool compiler_only_verifies = !GetCompilerOptions().IsAnyMethodCompilationEnabled();
+  bool compiler_only_verifies = !GetCompilerOptions().IsAnyCompilationEnabled();
 
   // We successfully validated the dependencies, now update class status
   // of verified classes. Note that the dependencies also record which classes
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 874e357..fbab9df 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -179,6 +179,40 @@
                                      uint16_t class_def_index,
                                      bool requires)
       REQUIRES(!requires_constructor_barrier_lock_);
+
+  // Do the <init> methods for this class require a constructor barrier (prior to the return)?
+  // The answer is "yes", if and only if this class has any instance final fields.
+  // (This must not be called for any non-<init> methods; the answer would be "no").
+  //
+  // ---
+  //
+  // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end
+  // of the invoked constructor. The constructor barrier is a conservative implementation means of
+  // enforcing the freezes happen-before the object being constructed is observable by another
+  // thread.
+  //
+  // Note: This question only makes sense for instance constructors;
+  // static constructors (despite possibly having finals) never need
+  // a barrier.
+  //
+  // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes
+  // class initialization as:
+  //
+  //   lock(class.lock)
+  //     class.state = initializing
+  //   unlock(class.lock)
+  //
+  //   invoke <clinit>
+  //
+  //   lock(class.lock)
+  //     class.state = initialized
+  //   unlock(class.lock)              <-- acts as a release
+  //
+  // The last operation in the above example acts as an atomic release
+  // for any stores in <clinit>, which ends up being stricter
+  // than what a constructor barrier needs.
+  //
+  // See also QuasiAtomic::ThreadFenceForConstructor().
   bool RequiresConstructorBarrier(Thread* self,
                                   const DexFile* dex_file,
                                   uint16_t class_def_index)
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 42ff1e7..17854fd 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -325,7 +325,7 @@
 class CompilerDriverVerifyTest : public CompilerDriverTest {
  protected:
   CompilerFilter::Filter GetCompilerFilter() const OVERRIDE {
-    return CompilerFilter::kVerifyProfile;
+    return CompilerFilter::kVerify;
   }
 
   void CheckVerifiedClass(jobject class_loader, const std::string& clazz) const {
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index a0c0a2a..a4e2083 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -200,7 +200,7 @@
     ParseDumpInitFailures(option, Usage);
   } else if (option.starts_with("--dump-cfg=")) {
     dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
-  } else if (option.starts_with("--dump-cfg-append")) {
+  } else if (option == "--dump-cfg-append") {
     dump_cfg_append_ = true;
   } else if (option.starts_with("--register-allocation-strategy=")) {
     ParseRegisterAllocationStrategy(option, Usage);
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 2376fbf..957ea99 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -84,32 +84,32 @@
     compiler_filter_ = compiler_filter;
   }
 
-  bool VerifyAtRuntime() const {
-    return compiler_filter_ == CompilerFilter::kVerifyAtRuntime;
-  }
-
-  bool IsBytecodeCompilationEnabled() const {
-    return CompilerFilter::IsBytecodeCompilationEnabled(compiler_filter_);
+  bool IsAotCompilationEnabled() const {
+    return CompilerFilter::IsAotCompilationEnabled(compiler_filter_);
   }
 
   bool IsJniCompilationEnabled() const {
     return CompilerFilter::IsJniCompilationEnabled(compiler_filter_);
   }
 
+  bool IsQuickeningCompilationEnabled() const {
+    return CompilerFilter::IsQuickeningCompilationEnabled(compiler_filter_);
+  }
+
   bool IsVerificationEnabled() const {
     return CompilerFilter::IsVerificationEnabled(compiler_filter_);
   }
 
   bool AssumeClassesAreVerified() const {
-    return compiler_filter_ == CompilerFilter::kVerifyNone;
+    return compiler_filter_ == CompilerFilter::kAssumeVerified;
   }
 
-  bool VerifyOnlyProfile() const {
-    return compiler_filter_ == CompilerFilter::kVerifyProfile;
+  bool VerifyAtRuntime() const {
+    return compiler_filter_ == CompilerFilter::kExtract;
   }
 
-  bool IsAnyMethodCompilationEnabled() const {
-    return CompilerFilter::IsAnyMethodCompilationEnabled(compiler_filter_);
+  bool IsAnyCompilationEnabled() const {
+    return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_);
   }
 
   size_t GetHugeMethodThreshold() const {
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e53d8d..9d7aff7 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -14,492 +14,17 @@
  * limitations under the License.
  */
 
-#include "image.h"
-
-#include <memory>
-#include <string>
+#include <string.h>
 #include <vector>
 
-#include "android-base/stringprintf.h"
+#include "image_test.h"
 
-#include "art_method-inl.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker-inl.h"
-#include "compiler_callbacks.h"
-#include "common_compiler_test.h"
-#include "debug/method_debug_info.h"
-#include "dex/quick_compiler_callbacks.h"
-#include "driver/compiler_options.h"
-#include "elf_writer.h"
-#include "elf_writer_quick.h"
-#include "gc/space/image_space.h"
-#include "image_writer.h"
-#include "linker/buffered_output_stream.h"
-#include "linker/file_output_stream.h"
-#include "linker/multi_oat_relative_patcher.h"
-#include "lock_word.h"
-#include "mirror/object-inl.h"
-#include "oat_writer.h"
+#include "image.h"
 #include "scoped_thread_state_change-inl.h"
-#include "signal_catcher.h"
-#include "utils.h"
+#include "thread.h"
 
 namespace art {
 
-static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS;
-
-struct CompilationHelper {
-  std::vector<std::string> dex_file_locations;
-  std::vector<ScratchFile> image_locations;
-  std::vector<std::unique_ptr<const DexFile>> extra_dex_files;
-  std::vector<ScratchFile> image_files;
-  std::vector<ScratchFile> oat_files;
-  std::vector<ScratchFile> vdex_files;
-  std::string image_dir;
-
-  void Compile(CompilerDriver* driver,
-               ImageHeader::StorageMode storage_mode);
-
-  std::vector<size_t> GetImageObjectSectionSizes();
-
-  ~CompilationHelper();
-};
-
-class ImageTest : public CommonCompilerTest {
- protected:
-  virtual void SetUp() {
-    ReserveImageSpace();
-    CommonCompilerTest::SetUp();
-  }
-
-  void TestWriteRead(ImageHeader::StorageMode storage_mode);
-
-  void Compile(ImageHeader::StorageMode storage_mode,
-               CompilationHelper& out_helper,
-               const std::string& extra_dex = "",
-               const std::initializer_list<std::string>& image_classes = {});
-
-  void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
-    CommonCompilerTest::SetUpRuntimeOptions(options);
-    callbacks_.reset(new QuickCompilerCallbacks(
-        verification_results_.get(),
-        CompilerCallbacks::CallbackMode::kCompileBootImage));
-    options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
-  }
-
-  std::unordered_set<std::string>* GetImageClasses() OVERRIDE {
-    return new std::unordered_set<std::string>(image_classes_);
-  }
-
-  ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    PointerSize pointer_size = class_linker_->GetImagePointerSize();
-    for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
-      if (strcmp(origin->GetName(), m.GetName()) == 0 &&
-          origin->GetSignature() == m.GetSignature()) {
-        return &m;
-      }
-    }
-    return nullptr;
-  }
-
- private:
-  std::unordered_set<std::string> image_classes_;
-};
-
-CompilationHelper::~CompilationHelper() {
-  for (ScratchFile& image_file : image_files) {
-    image_file.Unlink();
-  }
-  for (ScratchFile& oat_file : oat_files) {
-    oat_file.Unlink();
-  }
-  for (ScratchFile& vdex_file : vdex_files) {
-    vdex_file.Unlink();
-  }
-  const int rmdir_result = rmdir(image_dir.c_str());
-  CHECK_EQ(0, rmdir_result);
-}
-
-std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
-  std::vector<size_t> ret;
-  for (ScratchFile& image_file : image_files) {
-    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
-    CHECK(file.get() != nullptr);
-    ImageHeader image_header;
-    CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
-    CHECK(image_header.IsValid());
-    ret.push_back(image_header.GetImageSize());
-  }
-  return ret;
-}
-
-void CompilationHelper::Compile(CompilerDriver* driver,
-                                ImageHeader::StorageMode storage_mode) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  std::vector<const DexFile*> class_path = class_linker->GetBootClassPath();
-
-  for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) {
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      // Inject in boot class path so that the compiler driver can see it.
-      class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get());
-    }
-    class_path.push_back(dex_file.get());
-  }
-
-  // Enable write for dex2dex.
-  for (const DexFile* dex_file : class_path) {
-    dex_file_locations.push_back(dex_file->GetLocation());
-    if (dex_file->IsReadOnly()) {
-      dex_file->EnableWrite();
-    }
-  }
-  {
-    // Create a generic tmp file, to be the base of the .art and .oat temporary files.
-    ScratchFile location;
-    for (int i = 0; i < static_cast<int>(class_path.size()); ++i) {
-      std::string cur_location =
-          android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i);
-      image_locations.push_back(ScratchFile(cur_location));
-    }
-  }
-  std::vector<std::string> image_filenames;
-  for (ScratchFile& file : image_locations) {
-    std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA));
-    image_filenames.push_back(image_filename);
-    size_t pos = image_filename.rfind('/');
-    CHECK_NE(pos, std::string::npos) << image_filename;
-    if (image_dir.empty()) {
-      image_dir = image_filename.substr(0, pos);
-      int mkdir_result = mkdir(image_dir.c_str(), 0700);
-      CHECK_EQ(0, mkdir_result) << image_dir;
-    }
-    image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str())));
-  }
-
-  std::vector<std::string> oat_filenames;
-  std::vector<std::string> vdex_filenames;
-  for (const std::string& image_filename : image_filenames) {
-    std::string oat_filename = ReplaceFileExtension(image_filename, "oat");
-    oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str())));
-    oat_filenames.push_back(oat_filename);
-    std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex");
-    vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str())));
-    vdex_filenames.push_back(vdex_filename);
-  }
-
-  std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
-  std::vector<const char*> oat_filename_vector;
-  for (const std::string& file : oat_filenames) {
-    oat_filename_vector.push_back(file.c_str());
-  }
-  std::vector<const char*> image_filename_vector;
-  for (const std::string& file : image_filenames) {
-    image_filename_vector.push_back(file.c_str());
-  }
-  size_t image_idx = 0;
-  for (const DexFile* dex_file : class_path) {
-    dex_file_to_oat_index_map.emplace(dex_file, image_idx);
-    ++image_idx;
-  }
-  // TODO: compile_pic should be a test argument.
-  std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver,
-                                                      kRequestedImageBase,
-                                                      /*compile_pic*/false,
-                                                      /*compile_app_image*/false,
-                                                      storage_mode,
-                                                      oat_filename_vector,
-                                                      dex_file_to_oat_index_map));
-  {
-    {
-      jobject class_loader = nullptr;
-      TimingLogger timings("ImageTest::WriteRead", false, false);
-      TimingLogger::ScopedTiming t("CompileAll", &timings);
-      driver->SetDexFilesForOatFile(class_path);
-      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
-
-      t.NewTiming("WriteElf");
-      SafeMap<std::string, std::string> key_value_store;
-      std::vector<const char*> dex_filename_vector;
-      for (size_t i = 0; i < class_path.size(); ++i) {
-        dex_filename_vector.push_back("");
-      }
-      key_value_store.Put(OatHeader::kBootClassPathKey,
-                          gc::space::ImageSpace::GetMultiImageBootClassPath(
-                              dex_filename_vector,
-                              oat_filename_vector,
-                              image_filename_vector));
-
-      std::vector<std::unique_ptr<ElfWriter>> elf_writers;
-      std::vector<std::unique_ptr<OatWriter>> oat_writers;
-      for (ScratchFile& oat_file : oat_files) {
-        elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(),
-                                                      driver->GetInstructionSetFeatures(),
-                                                      &driver->GetCompilerOptions(),
-                                                      oat_file.GetFile()));
-        elf_writers.back()->Start();
-        oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
-                                               &timings,
-                                               /*profile_compilation_info*/nullptr));
-      }
-
-      std::vector<OutputStream*> rodata;
-      std::vector<std::unique_ptr<MemMap>> opened_dex_files_map;
-      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
-      // Now that we have finalized key_value_store_, start writing the oat file.
-      for (size_t i = 0, size = oat_writers.size(); i != size; ++i) {
-        const DexFile* dex_file = class_path[i];
-        rodata.push_back(elf_writers[i]->StartRoData());
-        ArrayRef<const uint8_t> raw_dex_file(
-            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
-            dex_file->GetHeader().file_size_);
-        oat_writers[i]->AddRawDexFileSource(raw_dex_file,
-                                            dex_file->GetLocation().c_str(),
-                                            dex_file->GetLocationChecksum());
-
-        std::unique_ptr<MemMap> cur_opened_dex_files_map;
-        std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files;
-        bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles(
-            kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(),
-            rodata.back(),
-            driver->GetInstructionSet(),
-            driver->GetInstructionSetFeatures(),
-            &key_value_store,
-            /* verify */ false,           // Dex files may be dex-to-dex-ed, don't verify.
-            /* update_input_vdex */ false,
-            &cur_opened_dex_files_map,
-            &cur_opened_dex_files);
-        ASSERT_TRUE(dex_files_ok);
-
-        if (cur_opened_dex_files_map != nullptr) {
-          opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map));
-          for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) {
-            // dex_file_oat_index_map_.emplace(dex_file.get(), i);
-            opened_dex_files.push_back(std::move(cur_dex_file));
-          }
-        } else {
-          ASSERT_TRUE(cur_opened_dex_files.empty());
-        }
-      }
-      bool image_space_ok = writer->PrepareImageAddressSpace();
-      ASSERT_TRUE(image_space_ok);
-
-      if (kIsVdexEnabled) {
-        for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
-          std::unique_ptr<BufferedOutputStream> vdex_out(
-              MakeUnique<BufferedOutputStream>(
-                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
-          oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
-          oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
-        }
-      }
-
-      for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
-        linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
-                                                driver->GetInstructionSetFeatures());
-        OatWriter* const oat_writer = oat_writers[i].get();
-        ElfWriter* const elf_writer = elf_writers[i].get();
-        std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
-        oat_writer->Initialize(driver, writer.get(), cur_dex_files);
-        oat_writer->PrepareLayout(&patcher);
-        size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
-        size_t text_size = oat_writer->GetOatSize() - rodata_size;
-        elf_writer->PrepareDynamicSection(rodata_size,
-                                          text_size,
-                                          oat_writer->GetBssSize(),
-                                          oat_writer->GetBssRootsOffset());
-
-        writer->UpdateOatFileLayout(i,
-                                    elf_writer->GetLoadedSize(),
-                                    oat_writer->GetOatDataOffset(),
-                                    oat_writer->GetOatSize());
-
-        bool rodata_ok = oat_writer->WriteRodata(rodata[i]);
-        ASSERT_TRUE(rodata_ok);
-        elf_writer->EndRoData(rodata[i]);
-
-        OutputStream* text = elf_writer->StartText();
-        bool text_ok = oat_writer->WriteCode(text);
-        ASSERT_TRUE(text_ok);
-        elf_writer->EndText(text);
-
-        bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
-        ASSERT_TRUE(header_ok);
-
-        writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
-
-        elf_writer->WriteDynamicSection();
-        elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-
-        bool success = elf_writer->End();
-        ASSERT_TRUE(success);
-      }
-    }
-
-    bool success_image = writer->Write(kInvalidFd,
-                                       image_filename_vector,
-                                       oat_filename_vector);
-    ASSERT_TRUE(success_image);
-
-    for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) {
-      const char* oat_filename = oat_filenames[i].c_str();
-      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
-      ASSERT_TRUE(oat_file != nullptr);
-      bool success_fixup = ElfWriter::Fixup(oat_file.get(),
-                                            writer->GetOatDataBegin(i));
-      ASSERT_TRUE(success_fixup);
-      ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
-                                                  << oat_filename;
-    }
-  }
-}
-
-void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
-                        CompilationHelper& helper,
-                        const std::string& extra_dex,
-                        const std::initializer_list<std::string>& image_classes) {
-  for (const std::string& image_class : image_classes) {
-    image_classes_.insert(image_class);
-  }
-  CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
-  // Set inline filter values.
-  compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
-  image_classes_.clear();
-  if (!extra_dex.empty()) {
-    helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
-  }
-  helper.Compile(compiler_driver_.get(), storage_mode);
-  if (image_classes.begin() != image_classes.end()) {
-    // Make sure the class got initialized.
-    ScopedObjectAccess soa(Thread::Current());
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    for (const std::string& image_class : image_classes) {
-      mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
-      EXPECT_TRUE(klass != nullptr);
-      EXPECT_TRUE(klass->IsInitialized());
-    }
-  }
-}
-
-void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
-  CompilationHelper helper;
-  Compile(storage_mode, /*out*/ helper);
-  std::vector<uint64_t> image_file_sizes;
-  for (ScratchFile& image_file : helper.image_files) {
-    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
-    ASSERT_TRUE(file.get() != nullptr);
-    ImageHeader image_header;
-    ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
-    ASSERT_TRUE(image_header.IsValid());
-    const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
-    ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
-    ASSERT_NE(0U, bitmap_section.Size());
-
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    ASSERT_TRUE(heap->HaveContinuousSpaces());
-    gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
-    ASSERT_FALSE(space->IsImageSpace());
-    ASSERT_TRUE(space != nullptr);
-    ASSERT_TRUE(space->IsMallocSpace());
-    image_file_sizes.push_back(file->GetLength());
-  }
-
-  ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
-  std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses());
-
-  // Need to delete the compiler since it has worker threads which are attached to runtime.
-  compiler_driver_.reset();
-
-  // Tear down old runtime before making a new one, clearing out misc state.
-
-  // Remove the reservation of the memory for use to load the image.
-  // Need to do this before we reset the runtime.
-  UnreserveImageSpace();
-
-  helper.extra_dex_files.clear();
-  runtime_.reset();
-  java_lang_dex_file_ = nullptr;
-
-  MemMap::Init();
-
-  RuntimeOptions options;
-  std::string image("-Ximage:");
-  image.append(helper.image_locations[0].GetFilename());
-  options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr)));
-  // By default the compiler this creates will not include patch information.
-  options.push_back(std::make_pair("-Xnorelocate", nullptr));
-
-  if (!Runtime::Create(options, false)) {
-    LOG(FATAL) << "Failed to create runtime";
-    return;
-  }
-  runtime_.reset(Runtime::Current());
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
-  // give it away now and then switch to a more managable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
-  ScopedObjectAccess soa(Thread::Current());
-  ASSERT_TRUE(runtime_.get() != nullptr);
-  class_linker_ = runtime_->GetClassLinker();
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  ASSERT_TRUE(heap->HasBootImageSpace());
-  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
-
-  // We loaded the runtime with an explicit image, so it must exist.
-  ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size());
-  for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) {
-    std::unique_ptr<const DexFile> dex(
-        LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str()));
-    ASSERT_TRUE(dex != nullptr);
-    uint64_t image_file_size = image_file_sizes[i];
-    gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i];
-    ASSERT_TRUE(image_space != nullptr);
-    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
-      // Uncompressed, image should be smaller than file.
-      ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size);
-    } else if (image_file_size > 16 * KB) {
-      // Compressed, file should be smaller than image. Not really valid for small images.
-      ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize());
-    }
-
-    image_space->VerifyImageAllocations();
-    uint8_t* image_begin = image_space->Begin();
-    uint8_t* image_end = image_space->End();
-    if (i == 0) {
-      // This check is only valid for image 0.
-      CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin));
-    }
-    for (size_t j = 0; j < dex->NumClassDefs(); ++j) {
-      const DexFile::ClassDef& class_def = dex->GetClassDef(j);
-      const char* descriptor = dex->GetClassDescriptor(class_def);
-      mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor);
-      EXPECT_TRUE(klass != nullptr) << descriptor;
-      if (image_classes.find(descriptor) == image_classes.end()) {
-        EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end ||
-                    reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor;
-      } else {
-        // Image classes should be located inside the image.
-        EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor;
-        EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor;
-      }
-      EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
-    }
-  }
-}
-
-TEST_F(ImageTest, WriteReadUncompressed) {
-  TestWriteRead(ImageHeader::kStorageModeUncompressed);
-}
-
-TEST_F(ImageTest, WriteReadLZ4) {
-  TestWriteRead(ImageHeader::kStorageModeLZ4);
-}
-
-TEST_F(ImageTest, WriteReadLZ4HC) {
-  TestWriteRead(ImageHeader::kStorageModeLZ4HC);
-}
-
 TEST_F(ImageTest, TestImageLayout) {
   std::vector<size_t> image_sizes;
   std::vector<size_t> image_sizes_extra;
diff --git a/compiler/image_test.h b/compiler/image_test.h
new file mode 100644
index 0000000..2f15ff4
--- /dev/null
+++ b/compiler/image_test.h
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_IMAGE_TEST_H_
+#define ART_COMPILER_IMAGE_TEST_H_
+
+#include "image.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "android-base/stringprintf.h"
+
+#include "art_method-inl.h"
+#include "base/unix_file/fd_file.h"
+#include "class_linker-inl.h"
+#include "compiler_callbacks.h"
+#include "common_compiler_test.h"
+#include "debug/method_debug_info.h"
+#include "dex/quick_compiler_callbacks.h"
+#include "driver/compiler_options.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
+#include "gc/space/image_space.h"
+#include "image_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
+#include "linker/multi_oat_relative_patcher.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
+#include "oat_writer.h"
+#include "scoped_thread_state_change-inl.h"
+#include "signal_catcher.h"
+#include "utils.h"
+
+namespace art {
+
+static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS;
+
+struct CompilationHelper {
+  std::vector<std::string> dex_file_locations;
+  std::vector<ScratchFile> image_locations;
+  std::vector<std::unique_ptr<const DexFile>> extra_dex_files;
+  std::vector<ScratchFile> image_files;
+  std::vector<ScratchFile> oat_files;
+  std::vector<ScratchFile> vdex_files;
+  std::string image_dir;
+
+  void Compile(CompilerDriver* driver,
+               ImageHeader::StorageMode storage_mode);
+
+  std::vector<size_t> GetImageObjectSectionSizes();
+
+  ~CompilationHelper();
+};
+
+class ImageTest : public CommonCompilerTest {
+ protected:
+  virtual void SetUp() {
+    ReserveImageSpace();
+    CommonCompilerTest::SetUp();
+  }
+
+  void TestWriteRead(ImageHeader::StorageMode storage_mode);
+
+  void Compile(ImageHeader::StorageMode storage_mode,
+               CompilationHelper& out_helper,
+               const std::string& extra_dex = "",
+               const std::initializer_list<std::string>& image_classes = {});
+
+  void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
+    CommonCompilerTest::SetUpRuntimeOptions(options);
+    callbacks_.reset(new QuickCompilerCallbacks(
+        verification_results_.get(),
+        CompilerCallbacks::CallbackMode::kCompileBootImage));
+    options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+  }
+
+  std::unordered_set<std::string>* GetImageClasses() OVERRIDE {
+    return new std::unordered_set<std::string>(image_classes_);
+  }
+
+  ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
+    for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
+      if (strcmp(origin->GetName(), m.GetName()) == 0 &&
+          origin->GetSignature() == m.GetSignature()) {
+        return &m;
+      }
+    }
+    return nullptr;
+  }
+
+ private:
+  std::unordered_set<std::string> image_classes_;
+};
+
+inline CompilationHelper::~CompilationHelper() {
+  for (ScratchFile& image_file : image_files) {
+    image_file.Unlink();
+  }
+  for (ScratchFile& oat_file : oat_files) {
+    oat_file.Unlink();
+  }
+  for (ScratchFile& vdex_file : vdex_files) {
+    vdex_file.Unlink();
+  }
+  const int rmdir_result = rmdir(image_dir.c_str());
+  CHECK_EQ(0, rmdir_result);
+}
+
+inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
+  std::vector<size_t> ret;
+  for (ScratchFile& image_file : image_files) {
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
+    CHECK(file.get() != nullptr);
+    ImageHeader image_header;
+    CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
+    CHECK(image_header.IsValid());
+    ret.push_back(image_header.GetImageSize());
+  }
+  return ret;
+}
+
+inline void CompilationHelper::Compile(CompilerDriver* driver,
+                                       ImageHeader::StorageMode storage_mode) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  std::vector<const DexFile*> class_path = class_linker->GetBootClassPath();
+
+  for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) {
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      // Inject in boot class path so that the compiler driver can see it.
+      class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get());
+    }
+    class_path.push_back(dex_file.get());
+  }
+
+  // Enable write for dex2dex.
+  for (const DexFile* dex_file : class_path) {
+    dex_file_locations.push_back(dex_file->GetLocation());
+    if (dex_file->IsReadOnly()) {
+      dex_file->EnableWrite();
+    }
+  }
+  {
+    // Create a generic tmp file, to be the base of the .art and .oat temporary files.
+    ScratchFile location;
+    for (int i = 0; i < static_cast<int>(class_path.size()); ++i) {
+      std::string cur_location =
+          android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i);
+      image_locations.push_back(ScratchFile(cur_location));
+    }
+  }
+  std::vector<std::string> image_filenames;
+  for (ScratchFile& file : image_locations) {
+    std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA));
+    image_filenames.push_back(image_filename);
+    size_t pos = image_filename.rfind('/');
+    CHECK_NE(pos, std::string::npos) << image_filename;
+    if (image_dir.empty()) {
+      image_dir = image_filename.substr(0, pos);
+      int mkdir_result = mkdir(image_dir.c_str(), 0700);
+      CHECK_EQ(0, mkdir_result) << image_dir;
+    }
+    image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str())));
+  }
+
+  std::vector<std::string> oat_filenames;
+  std::vector<std::string> vdex_filenames;
+  for (const std::string& image_filename : image_filenames) {
+    std::string oat_filename = ReplaceFileExtension(image_filename, "oat");
+    oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str())));
+    oat_filenames.push_back(oat_filename);
+    std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex");
+    vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str())));
+    vdex_filenames.push_back(vdex_filename);
+  }
+
+  std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
+  std::vector<const char*> oat_filename_vector;
+  for (const std::string& file : oat_filenames) {
+    oat_filename_vector.push_back(file.c_str());
+  }
+  std::vector<const char*> image_filename_vector;
+  for (const std::string& file : image_filenames) {
+    image_filename_vector.push_back(file.c_str());
+  }
+  size_t image_idx = 0;
+  for (const DexFile* dex_file : class_path) {
+    dex_file_to_oat_index_map.emplace(dex_file, image_idx);
+    ++image_idx;
+  }
+  // TODO: compile_pic should be a test argument.
+  std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver,
+                                                      kRequestedImageBase,
+                                                      /*compile_pic*/false,
+                                                      /*compile_app_image*/false,
+                                                      storage_mode,
+                                                      oat_filename_vector,
+                                                      dex_file_to_oat_index_map));
+  {
+    {
+      jobject class_loader = nullptr;
+      TimingLogger timings("ImageTest::WriteRead", false, false);
+      TimingLogger::ScopedTiming t("CompileAll", &timings);
+      driver->SetDexFilesForOatFile(class_path);
+      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
+
+      t.NewTiming("WriteElf");
+      SafeMap<std::string, std::string> key_value_store;
+      std::vector<const char*> dex_filename_vector;
+      for (size_t i = 0; i < class_path.size(); ++i) {
+        dex_filename_vector.push_back("");
+      }
+      key_value_store.Put(OatHeader::kBootClassPathKey,
+                          gc::space::ImageSpace::GetMultiImageBootClassPath(
+                              dex_filename_vector,
+                              oat_filename_vector,
+                              image_filename_vector));
+
+      std::vector<std::unique_ptr<ElfWriter>> elf_writers;
+      std::vector<std::unique_ptr<OatWriter>> oat_writers;
+      for (ScratchFile& oat_file : oat_files) {
+        elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(),
+                                                      driver->GetInstructionSetFeatures(),
+                                                      &driver->GetCompilerOptions(),
+                                                      oat_file.GetFile()));
+        elf_writers.back()->Start();
+        oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
+                                               &timings,
+                                               /*profile_compilation_info*/nullptr));
+      }
+
+      std::vector<OutputStream*> rodata;
+      std::vector<std::unique_ptr<MemMap>> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      // Now that we have finalized key_value_store_, start writing the oat file.
+      for (size_t i = 0, size = oat_writers.size(); i != size; ++i) {
+        const DexFile* dex_file = class_path[i];
+        rodata.push_back(elf_writers[i]->StartRoData());
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writers[i]->AddRawDexFileSource(raw_dex_file,
+                                            dex_file->GetLocation().c_str(),
+                                            dex_file->GetLocationChecksum());
+
+        std::unique_ptr<MemMap> cur_opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files;
+        bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles(
+            kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(),
+            rodata.back(),
+            driver->GetInstructionSet(),
+            driver->GetInstructionSetFeatures(),
+            &key_value_store,
+            /* verify */ false,           // Dex files may be dex-to-dex-ed, don't verify.
+            /* update_input_vdex */ false,
+            &cur_opened_dex_files_map,
+            &cur_opened_dex_files);
+        ASSERT_TRUE(dex_files_ok);
+
+        if (cur_opened_dex_files_map != nullptr) {
+          opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) {
+            // dex_file_oat_index_map_.emplace(dex_file.get(), i);
+            opened_dex_files.push_back(std::move(cur_dex_file));
+          }
+        } else {
+          ASSERT_TRUE(cur_opened_dex_files.empty());
+        }
+      }
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
+      if (kIsVdexEnabled) {
+        for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
+          std::unique_ptr<BufferedOutputStream> vdex_out(
+              MakeUnique<BufferedOutputStream>(
+                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
+          oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
+          oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
+        }
+      }
+
+      for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
+        linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
+                                                driver->GetInstructionSetFeatures());
+        OatWriter* const oat_writer = oat_writers[i].get();
+        ElfWriter* const elf_writer = elf_writers[i].get();
+        std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
+        oat_writer->Initialize(driver, writer.get(), cur_dex_files);
+        oat_writer->PrepareLayout(&patcher);
+        size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+        size_t text_size = oat_writer->GetOatSize() - rodata_size;
+        elf_writer->PrepareDynamicSection(rodata_size,
+                                          text_size,
+                                          oat_writer->GetBssSize(),
+                                          oat_writer->GetBssRootsOffset());
+
+        writer->UpdateOatFileLayout(i,
+                                    elf_writer->GetLoadedSize(),
+                                    oat_writer->GetOatDataOffset(),
+                                    oat_writer->GetOatSize());
+
+        bool rodata_ok = oat_writer->WriteRodata(rodata[i]);
+        ASSERT_TRUE(rodata_ok);
+        elf_writer->EndRoData(rodata[i]);
+
+        OutputStream* text = elf_writer->StartText();
+        bool text_ok = oat_writer->WriteCode(text);
+        ASSERT_TRUE(text_ok);
+        elf_writer->EndText(text);
+
+        bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+        ASSERT_TRUE(header_ok);
+
+        writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
+
+        elf_writer->WriteDynamicSection();
+        elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+
+        bool success = elf_writer->End();
+        ASSERT_TRUE(success);
+      }
+    }
+
+    bool success_image = writer->Write(kInvalidFd,
+                                       image_filename_vector,
+                                       oat_filename_vector);
+    ASSERT_TRUE(success_image);
+
+    for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) {
+      const char* oat_filename = oat_filenames[i].c_str();
+      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
+      ASSERT_TRUE(oat_file != nullptr);
+      bool success_fixup = ElfWriter::Fixup(oat_file.get(),
+                                            writer->GetOatDataBegin(i));
+      ASSERT_TRUE(success_fixup);
+      ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
+                                                  << oat_filename;
+    }
+  }
+}
+
+inline void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
+                        CompilationHelper& helper,
+                        const std::string& extra_dex,
+                        const std::initializer_list<std::string>& image_classes) {
+  for (const std::string& image_class : image_classes) {
+    image_classes_.insert(image_class);
+  }
+  CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
+  // Set inline filter values.
+  compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
+  image_classes_.clear();
+  if (!extra_dex.empty()) {
+    helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
+  }
+  helper.Compile(compiler_driver_.get(), storage_mode);
+  if (image_classes.begin() != image_classes.end()) {
+    // Make sure the class got initialized.
+    ScopedObjectAccess soa(Thread::Current());
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+    for (const std::string& image_class : image_classes) {
+      mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
+      EXPECT_TRUE(klass != nullptr);
+      EXPECT_TRUE(klass->IsInitialized());
+    }
+  }
+}
+
+inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
+  CompilationHelper helper;
+  Compile(storage_mode, /*out*/ helper);
+  std::vector<uint64_t> image_file_sizes;
+  for (ScratchFile& image_file : helper.image_files) {
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
+    ASSERT_TRUE(file.get() != nullptr);
+    ImageHeader image_header;
+    ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
+    ASSERT_TRUE(image_header.IsValid());
+    const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
+    ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
+    ASSERT_NE(0U, bitmap_section.Size());
+
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    ASSERT_TRUE(heap->HaveContinuousSpaces());
+    gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
+    ASSERT_FALSE(space->IsImageSpace());
+    ASSERT_TRUE(space != nullptr);
+    ASSERT_TRUE(space->IsMallocSpace());
+    image_file_sizes.push_back(file->GetLength());
+  }
+
+  ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
+  std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses());
+
+  // Need to delete the compiler since it has worker threads which are attached to runtime.
+  compiler_driver_.reset();
+
+  // Tear down old runtime before making a new one, clearing out misc state.
+
+  // Remove the reservation of the memory for use to load the image.
+  // Need to do this before we reset the runtime.
+  UnreserveImageSpace();
+
+  helper.extra_dex_files.clear();
+  runtime_.reset();
+  java_lang_dex_file_ = nullptr;
+
+  MemMap::Init();
+
+  RuntimeOptions options;
+  std::string image("-Ximage:");
+  image.append(helper.image_locations[0].GetFilename());
+  options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr)));
+  // By default the compiler this creates will not include patch information.
+  options.push_back(std::make_pair("-Xnorelocate", nullptr));
+
+  if (!Runtime::Create(options, false)) {
+    LOG(FATAL) << "Failed to create runtime";
+    return;
+  }
+  runtime_.reset(Runtime::Current());
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+  // give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  ScopedObjectAccess soa(Thread::Current());
+  ASSERT_TRUE(runtime_.get() != nullptr);
+  class_linker_ = runtime_->GetClassLinker();
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  ASSERT_TRUE(heap->HasBootImageSpace());
+  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
+
+  // We loaded the runtime with an explicit image, so it must exist.
+  ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size());
+  for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) {
+    std::unique_ptr<const DexFile> dex(
+        LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str()));
+    ASSERT_TRUE(dex != nullptr);
+    uint64_t image_file_size = image_file_sizes[i];
+    gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i];
+    ASSERT_TRUE(image_space != nullptr);
+    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+      // Uncompressed, image should be smaller than file.
+      ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size);
+    } else if (image_file_size > 16 * KB) {
+      // Compressed, file should be smaller than image. Not really valid for small images.
+      ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize());
+    }
+
+    image_space->VerifyImageAllocations();
+    uint8_t* image_begin = image_space->Begin();
+    uint8_t* image_end = image_space->End();
+    if (i == 0) {
+      // This check is only valid for image 0.
+      CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin));
+    }
+    for (size_t j = 0; j < dex->NumClassDefs(); ++j) {
+      const DexFile::ClassDef& class_def = dex->GetClassDef(j);
+      const char* descriptor = dex->GetClassDescriptor(class_def);
+      mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor);
+      EXPECT_TRUE(klass != nullptr) << descriptor;
+      if (image_classes.find(descriptor) == image_classes.end()) {
+        EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end ||
+                    reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor;
+      } else {
+        // Image classes should be located inside the image.
+        EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor;
+        EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor;
+      }
+      EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
+    }
+  }
+}
+
+
+}  // namespace art
+
+#endif  // ART_COMPILER_IMAGE_TEST_H_
diff --git a/compiler/image_write_read_test.cc b/compiler/image_write_read_test.cc
new file mode 100644
index 0000000..32c0b06
--- /dev/null
+++ b/compiler/image_write_read_test.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "image_test.h"
+
+namespace art {
+
+TEST_F(ImageTest, WriteReadUncompressed) {
+  TestWriteRead(ImageHeader::kStorageModeUncompressed);
+}
+
+TEST_F(ImageTest, WriteReadLZ4) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4);
+}
+
+TEST_F(ImageTest, WriteReadLZ4HC) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4HC);
+}
+
+}  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index f55d5a6..e9d579d 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -249,7 +249,7 @@
       // All remaining method call patches will be handled by this thunk.
       DCHECK(!unprocessed_method_call_patches_.empty());
       DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(),
-                MaxPositiveDisplacement(ThunkType::kMethodCall));
+                MaxPositiveDisplacement(GetMethodCallKey()));
       unprocessed_method_call_patches_.clear();
     }
   }
@@ -271,8 +271,8 @@
   DCHECK(method_call_thunk_ != nullptr);
   // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
   uint32_t displacement = target_offset - patch_offset;
-  uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
-  uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+  uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
+  uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
   // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
   if (displacement > max_positive_displacement && displacement < -max_negative_displacement) {
     // Unwritten thunks have higher offsets, check if it's within range.
@@ -299,29 +299,42 @@
   if (data.HasWrittenOffset()) {
     uint32_t offset = data.LastWrittenOffset();
     DCHECK_LT(offset, patch_offset);
-    if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) {
+    if (patch_offset - offset <= MaxNegativeDisplacement(key)) {
       return offset;
     }
   }
   DCHECK(data.HasPendingOffset());
   uint32_t offset = data.GetPendingOffset();
   DCHECK_GT(offset, patch_offset);
-  DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType()));
+  DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key));
   return offset;
 }
 
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() {
+  return ThunkKey(ThunkType::kMethodCall, ThunkParams{{ 0u, 0u }});  // NOLINT(whitespace/braces)
+}
+
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey(
+    const LinkerPatch& patch) {
+  DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
+  ThunkParams params;
+  params.baker_params.custom_value1 = patch.GetBakerCustomValue1();
+  params.baker_params.custom_value2 = patch.GetBakerCustomValue2();
+  ThunkKey key(ThunkType::kBakerReadBarrier, params);
+  return key;
+}
+
 void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method,
                                             uint32_t code_offset) {
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
     uint32_t patch_offset = code_offset + patch.LiteralOffset();
-    ThunkType key_type = static_cast<ThunkType>(-1);
+    ThunkKey key(static_cast<ThunkType>(-1), ThunkParams{{ 0u, 0u }});  // NOLINT(whitespace/braces)
     ThunkData* old_data = nullptr;
     if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
-      key_type = ThunkType::kMethodCall;
+      key = GetMethodCallKey();
       unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod());
       if (method_call_thunk_ == nullptr) {
-        ThunkKey key(key_type, ThunkParams{{ 0u, 0u }});  // NOLINT(whitespace/braces)
-        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
         auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset));
         method_call_thunk_ = &it->second;
         AddUnreservedThunk(method_call_thunk_);
@@ -329,11 +342,10 @@
         old_data = method_call_thunk_;
       }
     } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
-      ThunkKey key = GetBakerReadBarrierKey(patch);
-      key_type = key.GetType();
+      key = GetBakerThunkKey(patch);
       auto lb = thunks_.lower_bound(key);
       if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) {
-        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
         auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset));
         AddUnreservedThunk(&it->second);
       } else {
@@ -342,16 +354,16 @@
     }
     if (old_data != nullptr) {
       // Shared path where an old thunk may need an update.
-      DCHECK(key_type != static_cast<ThunkType>(-1));
+      DCHECK(key.GetType() != static_cast<ThunkType>(-1));
       DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset);
       if (old_data->NeedsNextThunk()) {
         // Patches for a method are ordered by literal offset, so if we still need to place
         // this thunk for a previous patch, that thunk shall be in range for this patch.
-        DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type));
+        DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key));
       } else {
         if (!old_data->HasReservedOffset() ||
-            patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) {
-          old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type));
+            patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) {
+          old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key));
           AddUnreservedThunk(old_data);
         }
       }
@@ -385,8 +397,8 @@
   DCHECK(!unreserved_thunks_.empty());
   DCHECK(!unprocessed_method_call_patches_.empty());
   DCHECK(method_call_thunk_ != nullptr);
-  uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
-  uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+  uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
+  uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
   // Process as many patches as possible, stop only on unresolved targets or calls too far back.
   while (!unprocessed_method_call_patches_.empty()) {
     MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod();
@@ -439,8 +451,8 @@
 }
 
 inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset,
-                                                               ThunkType type) {
-  return RoundDown(patch_offset + MaxPositiveDisplacement(type),
+                                                               const ThunkKey& key) {
+  return RoundDown(patch_offset + MaxPositiveDisplacement(key),
                    GetInstructionSetAlignment(instruction_set_));
 }
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 2cb1b6c..fd204c0 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -42,21 +42,12 @@
 
   enum class ThunkType {
     kMethodCall,              // Method call thunk.
-    kBakerReadBarrierField,   // Baker read barrier, load field or array element at known offset.
-    kBakerReadBarrierRoot,    // Baker read barrier, GC root load.
+    kBakerReadBarrier,        // Baker read barrier.
   };
 
-  struct BakerReadBarrierOffsetParams {
-    uint32_t holder_reg;      // Holder object for reading lock word.
-    uint32_t base_reg;        // Base register, different from holder for large offset.
-                              // If base differs from holder, it should be a pre-defined
-                              // register to limit the number of thunks we need to emit.
-                              // The offset is retrieved using introspection.
-  };
-
-  struct BakerReadBarrierRootParams {
-    uint32_t root_reg;        // The register holding the GC root.
-    uint32_t dummy;
+  struct BakerReadBarrierParams {
+    uint32_t custom_value1;
+    uint32_t custom_value2;
   };
 
   struct RawThunkParams {
@@ -66,8 +57,8 @@
 
   union ThunkParams {
     RawThunkParams raw_params;
-    BakerReadBarrierOffsetParams offset_params;
-    BakerReadBarrierRootParams root_params;
+    BakerReadBarrierParams baker_params;
+    static_assert(sizeof(raw_params) == sizeof(baker_params), "baker_params size check");
   };
 
   class ThunkKey {
@@ -78,14 +69,9 @@
       return type_;
     }
 
-    BakerReadBarrierOffsetParams GetOffsetParams() const {
-      DCHECK(type_ == ThunkType::kBakerReadBarrierField);
-      return params_.offset_params;
-    }
-
-    BakerReadBarrierRootParams GetRootParams() const {
-      DCHECK(type_ == ThunkType::kBakerReadBarrierRoot);
-      return params_.root_params;
+    BakerReadBarrierParams GetBakerReadBarrierParams() const {
+      DCHECK(type_ == ThunkType::kBakerReadBarrier);
+      return params_.baker_params;
     }
 
     RawThunkParams GetRawParams() const {
@@ -110,6 +96,9 @@
     }
   };
 
+  static ThunkKey GetMethodCallKey();
+  static ThunkKey GetBakerThunkKey(const LinkerPatch& patch);
+
   uint32_t ReserveSpaceInternal(uint32_t offset,
                                 const CompiledMethod* compiled_method,
                                 MethodReference method_ref,
@@ -119,10 +108,9 @@
   uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset,
                                            uint32_t target_offset);
 
-  virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0;
   virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0;
-  virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0;
-  virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0;
+  virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0;
+  virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0;
 
  private:
   class ThunkData;
@@ -132,7 +120,7 @@
 
   void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref);
 
-  uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type);
+  uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key);
 
   RelativePatcherTargetProvider* const provider_;
   const InstructionSet instruction_set_;
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 1a5d79c..ced52ff 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -16,9 +16,15 @@
 
 #include "linker/arm/relative_patcher_thumb2.h"
 
+#include "arch/arm/asm_support_arm.h"
 #include "art_method.h"
 #include "compiled_method.h"
-#include "utils/arm/assembler_thumb2.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "lock_word.h"
+#include "mirror/object.h"
+#include "mirror/array-inl.h"
+#include "read_barrier.h"
+#include "utils/arm/assembler_arm_vixl.h"
 
 namespace art {
 namespace linker {
@@ -32,6 +38,12 @@
 constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
 constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement;
 
+// Maximum positive and negative displacement for a conditional branch measured from the patch
+// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured
+// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.)
+constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement;
+constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement;
+
 Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
     : ArmBaseRelativePatcher(provider, kThumb2) {
 }
@@ -84,29 +96,225 @@
   SetInsn32(code, literal_offset, insn);
 }
 
-void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                        uint32_t patch_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
+void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+                                                        const LinkerPatch& patch,
+                                                        uint32_t patch_offset) {
+  DCHECK_ALIGNED(patch_offset, 2u);
+  uint32_t literal_offset = patch.LiteralOffset();
+  DCHECK_ALIGNED(literal_offset, 2u);
+  DCHECK_LT(literal_offset, code->size());
+  uint32_t insn = GetInsn32(code, literal_offset);
+  DCHECK_EQ(insn, 0xf0408000);  // BNE +0 (unpatched)
+  ThunkKey key = GetBakerThunkKey(patch);
+  if (kIsDebugBuild) {
+    const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1;
+    BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+    // Check that the next instruction matches the expected LDR.
+    switch (kind) {
+      case BakerReadBarrierKind::kField: {
+        DCHECK_GE(code->size() - literal_offset, 8u);
+        uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+        // LDR (immediate) with correct base_reg.
+        CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+        break;
+      }
+      case BakerReadBarrierKind::kArray: {
+        DCHECK_GE(code->size() - literal_offset, 8u);
+        uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+        // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+        CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+        CheckValidReg(next_insn & 0xf);  // Check index register
+        break;
+      }
+      case BakerReadBarrierKind::kGcRoot: {
+        DCHECK_GE(literal_offset, 4u);
+        uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
+        // LDR (immediate) with correct root_reg.
+        const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+        UNREACHABLE();
+    }
+  }
+  uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
+  DCHECK_ALIGNED(target_offset, 4u);
+  uint32_t disp = target_offset - (patch_offset + kPcDisplacement);
+  DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu);   // 21-bit signed.
+  insn |= ((disp << (26 - 20)) & 0x04000000u) |           // Shift bit 20 to 26, "S".
+          ((disp >> (19 - 11)) & 0x00000800u) |           // Shift bit 19 to 13, "J1".
+          ((disp >> (18 - 13)) & 0x00002000u) |           // Shift bit 18 to 11, "J2".
+          ((disp << (16 - 12)) & 0x003f0000u) |           // Shift bits 12-17 to 16-25, "imm6".
+          ((disp >> (1 - 0)) & 0x000007ffu);              // Shift bits 1-12 to 0-11, "imm11".
+  SetInsn32(code, literal_offset, insn);
 }
 
-ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey(
-    const LinkerPatch& patch ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
-  UNREACHABLE();
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
+                                     vixl::aarch32::Register base_reg,
+                                     vixl::aarch32::MemOperand& lock_word,
+                                     vixl::aarch32::Label* slow_path) {
+  using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+  // Load the lock word containing the rb_state.
+  __ Ldr(ip, lock_word);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+  __ B(ne, slow_path, /* is_far_target */ false);
+  static_assert(
+      BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+      "Field and array LDR offsets must be the same to reuse the same code.");
+  // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                "Field LDR must be 1 instruction (4B) before the return address label; "
+                " 2 instructions (8B) for heap poisoning.");
+  __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+  __ Bx(lr);          // And return back to the function.
+  // Note: The fake dependency is unnecessary for the slow path.
+}
+
+void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler,
+                                                         uint32_t encoded_data) {
+  using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister)
+      // that performs further checks on the reference and marks it if needed.
+      Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      // If base_reg differs from holder_reg, the offset was too large and we must have
+      // emitted an explicit null check before the load. Otherwise, we need to null-check
+      // the holder as we do not necessarily do that check before going to the thunk.
+      vixl::aarch32::Label throw_npe;
+      if (holder_reg.Is(base_reg)) {
+        __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false);
+      }
+      vixl::aarch32::Label slow_path;
+      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET;
+      MemOperand ldr_half_address(lr, ldr_offset + 2);
+      __ Ldrh(ip, ldr_half_address);          // Load the LDR immediate half-word with "Rt | imm12".
+      __ Ubfx(ip, ip, 0, 12);                 // Extract the offset imm12.
+      __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+      __ Bx(Register(kBakerCcEntrypointRegister));  // Jump to the entrypoint.
+      if (holder_reg.Is(base_reg)) {
+        // Add null check slow path. The stack map is at the address pointed to by LR.
+        __ Bind(&throw_npe);
+        int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value();
+        __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset));
+        __ Bx(ip);
+      }
+      break;
+    }
+    case BakerReadBarrierKind::kArray: {
+      Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl::aarch32::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+      MemOperand ldr_address(lr, ldr_offset + 2);
+      __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
+                                              // i.e. Rm+32 because the scale in imm2 is 2.
+      Register ep_reg(kBakerCcEntrypointRegister);  // Insert ip to the entrypoint address to create
+      __ Bfi(ep_reg, ip, 3, 6);               // a switch case target based on the index register.
+      __ Mov(ip, base_reg);                   // Move the base register to ip0.
+      __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
+      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+      // and it does not have a forwarding address), call the correct introspection entrypoint;
+      // otherwise return the reference (or the extracted forwarding address).
+      // There is no gray bit check for GC roots.
+      Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl::aarch32::Label return_label, not_marked, forwarding_address;
+      __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
+      MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+      __ Ldr(ip, lock_word);
+      __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+      __ B(eq, &not_marked);
+      __ Bind(&return_label);
+      __ Bx(lr);
+      __ Bind(&not_marked);
+      static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+                    "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+                    " the highest bits and the 'forwarding address' state to have all bits set");
+      __ Cmp(ip, Operand(0xc0000000));
+      __ B(hs, &forwarding_address);
+      // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+      // to art_quick_read_barrier_mark_introspection_gc_roots.
+      Register ep_reg(kBakerCcEntrypointRegister);
+      __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+      __ Mov(ip, root_reg);
+      __ Bx(ep_reg);
+      __ Bind(&forwarding_address);
+      __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+      __ Bx(lr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
 }
 
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
-  DCHECK(key.GetType() == ThunkType::kMethodCall);
-  // The thunk just uses the entry point in the ArtMethod. This works even for calls
-  // to the generic JNI and interpreter trampolines.
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  arm::Thumb2Assembler assembler(&arena);
-  assembler.LoadFromOffset(
-      arm::kLoadWord, arm::PC, arm::R0,
-      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
-  assembler.bkpt(0);
+  arm::ArmVIXLAssembler assembler(&arena);
+
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall:
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      assembler.LoadFromOffset(
+          arm::kLoadWord,
+          vixl::aarch32::pc,
+          vixl::aarch32::r0,
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+      __ Bkpt(0);
+      break;
+    case ThunkType::kBakerReadBarrier:
+      CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1);
+      break;
+  }
+
   assembler.FinalizeCode();
   std::vector<uint8_t> thunk_code(assembler.CodeSize());
   MemoryRegion code(thunk_code.data(), thunk_code.size());
@@ -114,14 +322,24 @@
   return thunk_code;
 }
 
-uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
-  DCHECK(type == ThunkType::kMethodCall);
-  return kMaxMethodCallPositiveDisplacement;
+#undef __
+
+uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall:
+      return kMaxMethodCallPositiveDisplacement;
+    case ThunkType::kBakerReadBarrier:
+      return kMaxBcondPositiveDisplacement;
+  }
 }
 
-uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
-  DCHECK(type == ThunkType::kMethodCall);
-  return kMaxMethodCallNegativeDisplacement;
+uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall:
+      return kMaxMethodCallNegativeDisplacement;
+    case ThunkType::kBakerReadBarrier:
+      return kMaxBcondNegativeDisplacement;
+  }
 }
 
 void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index ab37802..7fad245 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -17,13 +17,46 @@
 #ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
 #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
 
+#include "arch/arm/registers_arm.h"
+#include "base/array_ref.h"
+#include "base/bit_field.h"
+#include "base/bit_utils.h"
 #include "linker/arm/relative_patcher_arm_base.h"
 
 namespace art {
+
+namespace arm {
+class ArmVIXLAssembler;
+}  // namespace arm
+
 namespace linker {
 
 class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
  public:
+  static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
+
+  static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+    CheckValidReg(base_reg);
+    CheckValidReg(holder_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(holder_reg);
+  }
+
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+  }
+
+  static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+    CheckValidReg(root_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+  }
+
   explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
 
   void PatchCall(std::vector<uint8_t>* code,
@@ -39,12 +72,36 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  protected:
-  ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
   std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
-  uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
+  uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
+  uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
 
  private:
+  static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u;
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast
+  };
+
+  static constexpr size_t kBitsForBakerReadBarrierKind =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+  static constexpr size_t kBitsForRegister = 4u;
+  using BakerReadBarrierKindField =
+      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+  using BakerReadBarrierFirstRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
+  using BakerReadBarrierSecondRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+
+  static void CheckValidReg(uint32_t reg) {
+    DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister);
+  }
+
+  void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
+
   void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
   static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
 
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index f08270d..2e28349 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -14,8 +14,12 @@
  * limitations under the License.
  */
 
+#include "base/casts.h"
 #include "linker/relative_patcher_test.h"
 #include "linker/arm/relative_patcher_thumb2.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+#include "mirror/object.h"
 #include "oat_quick_method_header.h"
 
 namespace art {
@@ -34,13 +38,99 @@
   static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
   static const uint32_t kPcInsnOffset;
 
+  // The PC in Thumb mode is 4 bytes after the instruction location.
+  static constexpr uint32_t kPcAdjustment = 4u;
+
   // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
-  static constexpr uint32_t kBlPlus0 = 0xf000f800;
-  static constexpr uint32_t kBlMinus256 = 0xf7ffff00;
+  static constexpr uint32_t kBlPlus0 = 0xf000f800u;
+  static constexpr uint32_t kBlMinus256 = 0xf7ffff00u;
 
   // Special BL values.
-  static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff;
-  static constexpr uint32_t kBlMinusMax = 0xf400d000;
+  static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu;
+  static constexpr uint32_t kBlMinusMax = 0xf400d000u;
+
+  // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
+  static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+
+  // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
+  static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
+
+  // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract.
+  static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u;
+
+  // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift.
+  static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u;
+
+  // NOP instructions.
+  static constexpr uint32_t kNopInsn = 0xbf00u;
+  static constexpr uint32_t kNopWInsn = 0xf3af8000u;
+
+  void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+    CHECK_LE(pos, code->size());
+    if (IsUint<16>(insn)) {
+      const uint8_t insn_code[] = {
+          static_cast<uint8_t>(insn),
+          static_cast<uint8_t>(insn >> 8),
+      };
+      static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code).");
+      code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+    } else {
+      const uint8_t insn_code[] = {
+          static_cast<uint8_t>(insn >> 16),
+          static_cast<uint8_t>(insn >> 24),
+          static_cast<uint8_t>(insn),
+          static_cast<uint8_t>(insn >> 8),
+      };
+      static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+      code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+    }
+  }
+
+  void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
+    InsertInsn(code, code->size(), insn);
+  }
+
+  std::vector<uint8_t> GenNops(size_t num_nops) {
+    std::vector<uint8_t> result;
+    result.reserve(num_nops * 2u);
+    for (size_t i = 0; i != num_nops; ++i) {
+      PushBackInsn(&result, kNopInsn);
+    }
+    return result;
+  }
+
+  std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
+    std::vector<uint8_t> raw_code;
+    size_t number_of_16_bit_insns =
+        std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); });
+    raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u);
+    for (uint32_t insn : insns) {
+      PushBackInsn(&raw_code, insn);
+    }
+    return raw_code;
+  }
+
+  uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) {
+    if (!IsAligned<2u>(bne_offset)) {
+      LOG(ERROR) << "Unaligned bne_offset: " << bne_offset;
+      return 0xffffffffu;  // Fails code diff later.
+    }
+    if (!IsAligned<2u>(target_offset)) {
+      LOG(ERROR) << "Unaligned target_offset: " << target_offset;
+      return 0xffffffffu;  // Fails code diff later.
+    }
+    uint32_t diff = target_offset - bne_offset - kPcAdjustment;
+    DCHECK_ALIGNED(diff, 2u);
+    if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) {
+      LOG(ERROR) << "Target out of range: " << diff;
+      return 0xffffffffu;  // Fails code diff later.
+    }
+    return kBneWPlus0 | ((diff >> 1) & 0x7ffu)          // imm11
+                      | (((diff >> 12) & 0x3fu) << 16)  // imm6
+                      | (((diff >> 18) & 1) << 13)      // J1
+                      | (((diff >> 19) & 1) << 11)      // J2
+                      | (((diff >> 20) & 1) << 26);     // S
+  }
 
   bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
                              const ArrayRef<const LinkerPatch>& method1_patches,
@@ -95,9 +185,7 @@
   }
 
   std::vector<uint8_t> CompileMethodCallThunk() {
-    ArmBaseRelativePatcher::ThunkKey key(
-        ArmBaseRelativePatcher::ThunkType::kMethodCall,
-        ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }});  // NOLINT(whitespace/braces)
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
     return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
@@ -125,19 +213,54 @@
     std::vector<uint8_t> result;
     result.reserve(num_nops * 2u + 4u);
     for (size_t i = 0; i != num_nops; ++i) {
-      result.push_back(0x00);
-      result.push_back(0xbf);
+      PushBackInsn(&result, kNopInsn);
     }
-    result.push_back(static_cast<uint8_t>(bl >> 16));
-    result.push_back(static_cast<uint8_t>(bl >> 24));
-    result.push_back(static_cast<uint8_t>(bl));
-    result.push_back(static_cast<uint8_t>(bl >> 8));
+    PushBackInsn(&result, bl);
     return result;
   }
 
   void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
   void TestStringReference(uint32_t string_offset);
   void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+
+  std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
+    const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  uint32_t GetOutputInsn32(uint32_t offset) {
+    CHECK_LE(offset, output_.size());
+    CHECK_GE(output_.size() - offset, 4u);
+    return (static_cast<uint32_t>(output_[offset]) << 16) |
+           (static_cast<uint32_t>(output_[offset + 1]) << 24) |
+           (static_cast<uint32_t>(output_[offset + 2]) << 0) |
+           (static_cast<uint32_t>(output_[offset + 3]) << 8);
+  }
+
+  uint16_t GetOutputInsn16(uint32_t offset) {
+    CHECK_LE(offset, output_.size());
+    CHECK_GE(output_.size() - offset, 2u);
+    return (static_cast<uint32_t>(output_[offset]) << 0) |
+           (static_cast<uint32_t>(output_[offset + 1]) << 8);
+  }
+
+  void TestBakerField(uint32_t offset, uint32_t ref_reg);
 };
 
 const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -164,7 +287,7 @@
 void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
                                                       uint32_t element_offset) {
   dex_cache_arrays_begin_ = dex_cache_arrays_begin;
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
       LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
   };
@@ -175,7 +298,7 @@
 void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
   constexpr uint32_t kStringIndex = 1u;
   string_index_to_offset_map_.Put(kStringIndex, string_offset);
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
       LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
   };
@@ -214,7 +337,7 @@
 }
 
 TEST_F(Thumb2RelativePatcherTest, CallSelf) {
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
   AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -227,11 +350,11 @@
 }
 
 TEST_F(Thumb2RelativePatcherTest, CallOther) {
-  LinkerPatch method1_patches[] = {
+  const LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
   AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
-  LinkerPatch method2_patches[] = {
+  const LinkerPatch method2_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
   AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
@@ -254,7 +377,7 @@
 }
 
 TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
   AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -274,7 +397,7 @@
   constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method3_code(method3_raw_code);
   ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  LinkerPatch method3_patches[] = {
+  const LinkerPatch method3_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
   };
 
@@ -303,7 +426,7 @@
   constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method1_code(method1_raw_code);
   ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  LinkerPatch method1_patches[] = {
+  const LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
   };
 
@@ -325,7 +448,7 @@
   constexpr uint32_t bl_offset_in_method3 = 2u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method3_code(method3_raw_code);
   ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  LinkerPatch method3_patches[] = {
+  const LinkerPatch method3_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
   };
 
@@ -347,7 +470,7 @@
   constexpr uint32_t bl_offset_in_method1 = 2u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method1_code(method1_raw_code);
   ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  LinkerPatch method1_patches[] = {
+  const LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
   };
 
@@ -382,7 +505,7 @@
   constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method3_code(method3_raw_code);
   ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  LinkerPatch method3_patches[] = {
+  const LinkerPatch method3_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
   };
 
@@ -445,5 +568,535 @@
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
+void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  DCHECK_ALIGNED(offset, 4u);
+  DCHECK_LT(offset, 4 * KB);
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    for (uint32_t holder_reg : valid_regs) {
+      uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+      const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+      ASSERT_EQ(kMethodCodeSize, raw_code.size());
+      ArrayRef<const uint8_t> code(raw_code);
+      uint32_t encoded_data =
+          Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
+      const LinkerPatch patches[] = {
+          LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+      };
+      ++method_idx;
+      AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+    }
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    for (uint32_t holder_reg : valid_regs) {
+      ++method_idx;
+      uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+      uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+      const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+      ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+      ASSERT_TRUE(
+          CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+      std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
+      ASSERT_GT(output_.size(), thunk_offset);
+      ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+      ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                             expected_thunk.size());
+      if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+        DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+        ASSERT_TRUE(false);
+      }
+
+      size_t gray_check_offset = thunk_offset;
+      if (holder_reg == base_reg) {
+        // Verify that the null-check uses the correct register, i.e. holder_reg.
+        if (holder_reg < 8) {
+          ASSERT_GE(output_.size() - gray_check_offset, 2u);
+          ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+          gray_check_offset +=2u;
+        } else {
+          ASSERT_GE(output_.size() - gray_check_offset, 6u);
+          ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+          ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
+          gray_check_offset += 6u;
+        }
+      }
+      // Verify that the lock word for gray bit check is loaded from the holder address.
+      ASSERT_GE(output_.size() - gray_check_offset,
+                4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+      const uint32_t load_lock_word =
+          kLdrWInsn |
+          (holder_reg << 16) |
+          (/* IP */ 12 << 12) |
+          mirror::Object::MonitorOffset().Uint32Value();
+      ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+      // Verify the gray bit check.
+      DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
+      uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+      const uint32_t tst_gray_bit_without_offset =
+          0xf0100f00 | (/* IP */ 12 << 16)
+                     | (((ror_shift >> 4) & 1) << 26)   // i
+                     | (((ror_shift >> 1) & 7) << 12)   // imm3
+                     | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
+      EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+      EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u);  // BNE
+      // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+      const uint32_t fake_dependency =
+          0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+          (/* IP */ 12) |           // Rm = IP
+          (base_reg << 16) |        // Rn = base_reg
+          (base_reg << 8);          // Rd = base_reg
+      EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+      // Do not check the rest of the implementation.
+
+      // The next thunk follows on the next aligned offset.
+      thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+    }
+  }
+}
+
+#define TEST_BAKER_FIELD(offset, ref_reg)     \
+  TEST_F(Thumb2RelativePatcherTest,           \
+    BakerOffset##offset##_##ref_reg) {        \
+    TestBakerField(offset, ref_reg);          \
+  }
+
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7)
+TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11)
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
+  // One thunk in the middle with maximum distance branches to it from both sides.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+  constexpr uint32_t kLiteralOffset1 = 6u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code1(raw_code1);
+  uint32_t encoded_data =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+  const LinkerPatch patches1[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+  constexpr uint32_t expected_thunk_offset =
+      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+  size_t filler1_size = expected_thunk_offset -
+                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+  AddCompiledMethod(MethodRef(2u), filler1_code);
+
+  // Enforce thunk reservation with a tiny method.
+  AddCompiledMethod(MethodRef(3u), kNopCode);
+
+  constexpr uint32_t kLiteralOffset2 = 4;
+  static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment),
+                "PC for BNE must be aligned.");
+
+  // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch
+  // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract:
+  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+  size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+  size_t filler2_size =
+      1 * MB - (kLiteralOffset2 + kPcAdjustment)
+             - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - sizeof(OatQuickMethodHeader);
+  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+  AddCompiledMethod(MethodRef(4u), filler2_code);
+
+  const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code2(raw_code2);
+  const LinkerPatch patches2[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+  Link();
+
+  uint32_t first_method_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(5u);
+  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+  const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+  const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000;
+  const std::vector<uint8_t> expected_code1 =
+      RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+  const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn});
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) {
+  // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction
+  // earlier, so the thunk is emitted before the filler.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+  constexpr uint32_t kLiteralOffset1 = 4u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
+  ArrayRef<const uint8_t> code1(raw_code1);
+  uint32_t encoded_data =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+  const LinkerPatch patches1[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+  constexpr uint32_t expected_thunk_offset =
+      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20);
+  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+  size_t filler1_size = expected_thunk_offset -
+                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+  AddCompiledMethod(MethodRef(2u), filler1_code);
+
+  Link();
+
+  const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment));
+  const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn});
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
+  // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded
+  // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+  constexpr uint32_t kLiteralOffset1 = 6u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code1(raw_code1);
+  uint32_t encoded_data =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+  const LinkerPatch patches1[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+  constexpr uint32_t expected_thunk_offset =
+      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+  size_t filler1_size = expected_thunk_offset -
+                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+  AddCompiledMethod(MethodRef(2u), filler1_code);
+
+  // Enforce thunk reservation with a tiny method.
+  AddCompiledMethod(MethodRef(3u), kNopCode);
+
+  constexpr uint32_t kReachableFromOffset2 = 4;
+  constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2;
+  static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment),
+                "PC for BNE must be aligned.");
+
+  // If not for the extra NOP, this would allow reaching the thunk from the BNE
+  // of a method 1MiB away. Backward branch reaches the full 1MiB  but we need to take
+  // PC adjustment into account. Things to subtract:
+  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+  size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+  size_t filler2_size =
+      1 * MB - (kReachableFromOffset2 + kPcAdjustment)
+             - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - sizeof(OatQuickMethodHeader);
+  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+  AddCompiledMethod(MethodRef(4u), filler2_code);
+
+  // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle.
+  const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code2(raw_code2);
+  const LinkerPatch patches2[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+  Link();
+
+  uint32_t first_method_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(5u);
+  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+  const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+  const uint32_t bne_last =
+      BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment));
+  const std::vector<uint8_t> expected_code1 =
+      RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+  const std::vector<uint8_t> expected_code2 =
+      RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn});
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerArray) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  auto ldr = [](uint32_t base_reg) {
+    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+    return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12);
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+    const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the lock word for gray bit check is loaded from the correct address
+    // before the base_reg which points to the array data.
+    ASSERT_GE(output_.size() - thunk_offset,
+              4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+    int32_t data_offset =
+        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+    ASSERT_LT(offset, 0);
+    ASSERT_GT(offset, -256);
+    const uint32_t load_lock_word =
+        kLdrNegativeOffset |
+        (-offset & 0xffu) |
+        (base_reg << 16) |
+        (/* IP */ 12 << 12);
+    EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset));
+    // Verify the gray bit check.
+    DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
+    uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+    const uint32_t tst_gray_bit_without_offset =
+        0xf0100f00 | (/* IP */ 12 << 16)
+                   | (((ror_shift >> 4) & 1) << 26)   // i
+                   | (((ror_shift >> 1) & 7) << 12)   // imm3
+                   | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
+    EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u));
+    EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u);  // BNE
+    // Verify the fake dependency.
+    const uint32_t fake_dependency =
+        0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+        (/* IP */ 12) |           // Rm = IP
+        (base_reg << 16) |        // Rn = base_reg
+        (base_reg << 8);          // Rd = base_reg
+    EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u));
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+  }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 4u;
+  uint32_t method_idx = 0u;
+  for (uint32_t root_reg : valid_regs) {
+    ++method_idx;
+    uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+    const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t root_reg : valid_regs) {
+    ++method_idx;
+    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+    uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+    const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+    if (root_reg < 8) {
+      ASSERT_GE(output_.size() - thunk_offset, 2u);
+      ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+    } else {
+      ASSERT_GE(output_.size() - thunk_offset, 6u);
+      ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+      ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
+    }
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+  }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
+  // Test 1MiB of patches to the same thunk to stress-test different large offsets.
+  // (The low bits are not that important but the location of the high bits is easy to get wrong.)
+  std::vector<uint8_t> code;
+  code.reserve(1 * MB);
+  const size_t num_patches = 1 * MB / 8u;
+  std::vector<LinkerPatch> patches;
+  patches.reserve(num_patches);
+  const uint32_t ldr =
+      kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
+  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0);
+  for (size_t i = 0; i != num_patches; ++i) {
+    PushBackInsn(&code, ldr);
+    PushBackInsn(&code, kBneWPlus0);
+    patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+  }
+  ASSERT_EQ(1 * MB, code.size());
+  ASSERT_EQ(num_patches, patches.size());
+  AddCompiledMethod(MethodRef(1u),
+                    ArrayRef<const uint8_t>(code),
+                    ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  // The thunk is right after the method code.
+  DCHECK_ALIGNED(1 * MB, kArmAlignment);
+  std::vector<uint8_t> expected_code;
+  for (size_t i = 0; i != num_patches; ++i) {
+    PushBackInsn(&expected_code, ldr);
+    PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB));
+    patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+  }
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
+  // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
+  // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
+  // hold when we're reserving thunks of different sizes. This test exposes the situation
+  // by using Baker thunks and a method call thunk.
+
+  // Add a method call patch that can reach to method 1 offset + 16MiB.
+  uint32_t method_idx = 0u;
+  constexpr size_t kMethodCallLiteralOffset = 2u;
+  constexpr uint32_t kMissingMethodIdx = 2u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
+  const LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
+  };
+  ArrayRef<const uint8_t> code1(raw_code1);
+  ++method_idx;
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
+
+  // Skip kMissingMethodIdx.
+  ++method_idx;
+  ASSERT_EQ(kMissingMethodIdx, method_idx);
+  // Add a method with the right size that the method code for the next one starts 1MiB
+  // after code for method 1.
+  size_t filler_size =
+      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - sizeof(OatQuickMethodHeader);
+  std::vector<uint8_t> filler_code = GenNops(filler_size / 2u);
+  ++method_idx;
+  AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+  // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB
+  // before the currently scheduled MaxNextOffset() for the method call thunk.
+  for (uint32_t i = 0; i != 14; ++i) {
+    filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
+    filler_code = GenNops(filler_size / 2u);
+    ++method_idx;
+    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+  }
+
+  // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
+  // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the
+  // second that needs it kArmAlignment after that. Given the size of the GC root thunk
+  // is more than the space required by the method call thunk plus kArmAlignment,
+  // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
+  // thunk's pending MaxNextOffset() which needs to be adjusted.
+  ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
+            CompileBakerGcRootThunk(/* root_reg */ 0).size());
+  static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
+  constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
+  constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
+  // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`.
+  const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12);
+  const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12);
+  const std::vector<uint8_t> last_method_raw_code = RawCode({
+      kNopInsn,                                 // Padding before first GC root read barrier.
+      ldr1, kBneWPlus0,                         // First GC root LDR with read barrier.
+      ldr2, kBneWPlus0,                         // Second GC root LDR with read barrier.
+  });
+  uint32_t encoded_data1 =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
+  uint32_t encoded_data2 =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
+  const LinkerPatch last_method_patches[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
+      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
+  };
+  ++method_idx;
+  AddCompiledMethod(MethodRef(method_idx),
+                    ArrayRef<const uint8_t>(last_method_raw_code),
+                    ArrayRef<const LinkerPatch>(last_method_patches));
+
+  // The main purpose of the test is to check that Link() does not cause a crash.
+  Link();
+
+  ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 551c73b..2b06e3f 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -29,6 +29,7 @@
 #include "mirror/array-inl.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
+#include "read_barrier.h"
 #include "utils/arm64/assembler_arm64.h"
 
 namespace art {
@@ -304,27 +305,42 @@
   DCHECK_LT(literal_offset, code->size());
   uint32_t insn = GetInsn(code, literal_offset);
   DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000);  // CBNZ Xt, +0 (unpatched)
-  ThunkKey key = GetBakerReadBarrierKey(patch);
+  ThunkKey key = GetBakerThunkKey(patch);
   if (kIsDebugBuild) {
+    const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1;
+    BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
     // Check that the next instruction matches the expected LDR.
-    switch (key.GetType()) {
-      case ThunkType::kBakerReadBarrierField: {
+    switch (kind) {
+      case BakerReadBarrierKind::kField: {
         DCHECK_GE(code->size() - literal_offset, 8u);
         uint32_t next_insn = GetInsn(code, literal_offset + 4u);
         // LDR (immediate) with correct base_reg.
         CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5));
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
         break;
       }
-      case ThunkType::kBakerReadBarrierRoot: {
+      case BakerReadBarrierKind::kArray: {
+        DCHECK_GE(code->size() - literal_offset, 8u);
+        uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+        // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+        // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+        CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
+        CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
+        break;
+      }
+      case BakerReadBarrierKind::kGcRoot: {
         DCHECK_GE(literal_offset, 4u);
         uint32_t prev_insn = GetInsn(code, literal_offset - 4u);
         // LDR (immediate) with correct root_reg.
-        CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg);
+        const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
         break;
       }
       default:
-        LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+        LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
         UNREACHABLE();
     }
   }
@@ -336,40 +352,6 @@
   SetInsn(code, literal_offset, insn);
 }
 
-ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey(
-    const LinkerPatch& patch) {
-  DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
-  uint32_t value = patch.GetBakerCustomValue1();
-  BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value);
-  ThunkParams params;
-  switch (type) {
-    case BakerReadBarrierKind::kField:
-      params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.offset_params.base_reg);
-      params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
-      CheckValidReg(params.offset_params.holder_reg);
-      break;
-    case BakerReadBarrierKind::kGcRoot:
-      params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.root_params.root_reg);
-      params.root_params.dummy = 0u;
-      DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type);
-      UNREACHABLE();
-  }
-  constexpr uint8_t kTypeTranslationOffset = 1u;
-  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
-                static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
-                "Thunk type translation check.");
-  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
-                static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
-                "Thunk type translation check.");
-  return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset),
-                  params);
-}
-
 #define __ assembler.GetVIXLAssembler()->
 
 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
@@ -394,33 +376,27 @@
   // Introduce a dependency on the lock_word including rb_state,
   // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
-  __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32));
+  __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
   __ Br(lr);          // And return back to the function.
   // Note: The fake dependency is unnecessary for the slow path.
 }
 
-std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler,
+                                                        uint32_t encoded_data) {
   using namespace vixl::aarch64;  // NOLINT(build/namespaces)
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  arm64::Arm64Assembler assembler(&arena);
-
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall: {
-      // The thunk just uses the entry point in the ArtMethod. This works even for calls
-      // to the generic JNI and interpreter trampolines.
-      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-          kArm64PointerSize).Int32Value());
-      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
-      break;
-    }
-    case ThunkType::kBakerReadBarrierField: {
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
       // Check if the holder is gray and, if not, add fake dependency to the base register
       // and return to the LDR instruction to load the reference. Otherwise, use introspection
       // to load the reference and call the entrypoint (in IP1) that performs further checks
       // on the reference and marks it if needed.
-      auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg);
-      auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg);
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      auto holder_reg =
+          Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       // If base_reg differs from holder_reg, the offset was too large and we must have
@@ -444,17 +420,43 @@
         // Add null check slow path. The stack map is at the address pointed to by LR.
         __ Bind(&throw_npe);
         int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset));
+        __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
         __ Br(ip0);
       }
       break;
     }
-    case ThunkType::kBakerReadBarrierRoot: {
+    case BakerReadBarrierKind::kArray: {
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffset(), 0);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
+      __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
+      __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
+                                            // a switch case target based on the index register.
+      __ Mov(ip0, base_reg);                // Move the base register to ip0.
+      __ Br(ip1);                           // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
       // and it does not have a forwarding address), call the correct introspection entrypoint;
       // otherwise return the reference (or the extracted forwarding address).
       // There is no gray bit check for GC roots.
-      auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg);
+      auto root_reg =
+          Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       vixl::aarch64::Label return_label, not_marked, forwarding_address;
@@ -477,6 +479,30 @@
       __ Br(lr);
       break;
     }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
+}
+
+std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm64::Arm64Assembler assembler(&arena);
+
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall: {
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kArm64PointerSize).Int32Value());
+      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+      break;
+    }
+    case ThunkType::kBakerReadBarrier: {
+      CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1);
+      break;
+    }
   }
 
   // Ensure we emit the literal pool.
@@ -489,22 +515,20 @@
 
 #undef __
 
-uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
-  switch (type) {
+uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
     case ThunkType::kMethodCall:
       return kMaxMethodCallPositiveDisplacement;
-    case ThunkType::kBakerReadBarrierField:
-    case ThunkType::kBakerReadBarrierRoot:
+    case ThunkType::kBakerReadBarrier:
       return kMaxBcondPositiveDisplacement;
   }
 }
 
-uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
-  switch (type) {
+uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
     case ThunkType::kMethodCall:
       return kMaxMethodCallNegativeDisplacement;
-    case ThunkType::kBakerReadBarrierField:
-    case ThunkType::kBakerReadBarrierRoot:
+    case ThunkType::kBakerReadBarrier:
       return kMaxBcondNegativeDisplacement;
   }
 }
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 7887cea..d1ab410 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -19,19 +19,19 @@
 
 #include "base/array_ref.h"
 #include "base/bit_field.h"
+#include "base/bit_utils.h"
 #include "linker/arm/relative_patcher_arm_base.h"
 
 namespace art {
+
+namespace arm64 {
+class Arm64Assembler;
+}  // namespace arm64
+
 namespace linker {
 
 class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
  public:
-  enum class BakerReadBarrierKind : uint8_t {
-    kField,   // Field get or array get with constant offset (i.e. constant index).
-    kGcRoot,  // GC root load.
-    kLast
-  };
-
   static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
     CheckValidReg(base_reg);
     CheckValidReg(holder_reg);
@@ -40,6 +40,13 @@
            BakerReadBarrierSecondRegField::Encode(holder_reg);
   }
 
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+  }
+
   static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
     CheckValidReg(root_reg);
     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
@@ -68,14 +75,20 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  protected:
-  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
-
-  ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
   std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
-  uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
+  uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
+  uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
 
  private:
+  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast
+  };
+
   static constexpr size_t kBitsForBakerReadBarrierKind =
       MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
   static constexpr size_t kBitsForRegister = 5u;
@@ -90,6 +103,8 @@
     DCHECK(reg < 30u && reg != 16u && reg != 17u);
   }
 
+  void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
+
   static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
 
   static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b4d35ab..b6549ee 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -18,6 +18,7 @@
 #include "linker/relative_patcher_test.h"
 #include "linker/arm64/relative_patcher_arm64.h"
 #include "lock_word.h"
+#include "mirror/array-inl.h"
 #include "mirror/object.h"
 #include "oat_quick_method_header.h"
 
@@ -46,9 +47,15 @@
   static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
   static constexpr uint32_t kBlMinusMax = 0x96000000u;
 
-  // LDR immediate, unsigned offset.
+  // LDR immediate, 32-bit, unsigned offset.
   static constexpr uint32_t kLdrWInsn = 0xb9400000u;
 
+  // LDR register, 32-bit, LSL #2.
+  static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u;
+
+  // LDUR, 32-bit.
+  static constexpr uint32_t kLdurWInsn = 0xb8400000u;
+
   // ADD/ADDS/SUB/SUBS immediate, 64-bit.
   static constexpr uint32_t kAddXInsn = 0x91000000u;
   static constexpr uint32_t kAddsXInsn = 0xb1000000u;
@@ -68,7 +75,7 @@
   static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
 
   // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
-  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011;
+  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u;
 
   void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
     CHECK_LE(pos, code->size());
@@ -160,9 +167,7 @@
   }
 
   std::vector<uint8_t> CompileMethodCallThunk() {
-    ArmBaseRelativePatcher::ThunkKey key(
-        ArmBaseRelativePatcher::ThunkType::kMethodCall,
-        ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }});  // NOLINT(whitespace/braces)
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
     return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
@@ -188,7 +193,7 @@
 
   std::vector<uint8_t> GenNops(size_t num_nops) {
     std::vector<uint8_t> result;
-    result.reserve(num_nops * 4u + 4u);
+    result.reserve(num_nops * 4u);
     for (size_t i = 0; i != num_nops; ++i) {
       PushBackInsn(&result, kNopInsn);
     }
@@ -228,7 +233,7 @@
     } else {
       LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
     }
-    uint32_t adrp = 0x90000000 |              // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+    uint32_t adrp = 0x90000000u |             // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
         ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
         ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
         // We take the sign bit from the disp, limiting disp to +- 2GiB.
@@ -466,17 +471,22 @@
   std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
     const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
-    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
-    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
-    return patcher->CompileThunk(key);
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
   std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
     LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
-    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
-    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
-    return patcher->CompileThunk(key);
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
   uint32_t GetOutputInsn(uint32_t offset) {
@@ -488,7 +498,7 @@
            (static_cast<uint32_t>(output_[offset + 3]) << 24);
   }
 
-  void TestBakerField(uint32_t offset, uint32_t root_reg);
+  void TestBakerField(uint32_t offset, uint32_t ref_reg);
 };
 
 const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -885,7 +895,7 @@
 
 TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
 
-void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) {
+void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
@@ -899,7 +909,7 @@
   uint32_t method_idx = 0u;
   for (uint32_t base_reg : valid_regs) {
     for (uint32_t holder_reg : valid_regs) {
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
       const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
       ASSERT_EQ(kMethodCodeSize, raw_code.size());
       ArrayRef<const uint8_t> code(raw_code);
@@ -922,7 +932,7 @@
       ++method_idx;
       uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
       uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
       const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
       ASSERT_EQ(kMethodCodeSize, expected_code.size());
       ASSERT_TRUE(
@@ -942,7 +952,7 @@
       if (holder_reg == base_reg) {
         // Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
         ASSERT_GE(output_.size() - gray_check_offset, 4u);
-        ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+        ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
         gray_check_offset +=4u;
       }
       // Verify that the lock word for gray bit check is loaded from the holder address.
@@ -955,12 +965,12 @@
           /* ip0 */ 16;
       EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
       // Verify the gray bit check.
-      const uint32_t check_gray_bit_witout_offset =
-          0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
-      EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f);
+      const uint32_t check_gray_bit_without_offset =
+          0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+      EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu);
       // Verify the fake dependency.
       const uint32_t fake_dependency =
-          0x8b408000 |              // ADD Xd, Xn, Xm, LSR 32
+          0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
           (/* ip0 */ 16 << 16) |    // Xm = ip0
           (base_reg << 5) |         // Xn = base_reg
           base_reg;                 // Xd = base_reg
@@ -973,19 +983,19 @@
   }
 }
 
-#define TEST_BAKER_FIELD(offset, root_reg)    \
+#define TEST_BAKER_FIELD(offset, ref_reg)     \
   TEST_F(Arm64RelativePatcherTestDefault,     \
-    BakerOffset##offset##_##root_reg) {       \
-    TestBakerField(offset, root_reg);         \
+    BakerOffset##offset##_##ref_reg) {        \
+    TestBakerField(offset, ref_reg);          \
   }
 
-TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15)
-TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29)
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15)
+TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29)
 
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
   // One thunk in the middle with maximum distance branches to it from both sides.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 4;
   const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1046,7 +1056,7 @@
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
   // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
   // earlier, so the thunk is emitted before the filler.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 0;
   const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1076,7 +1086,7 @@
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
   // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
   // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 4;
   const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1132,7 +1142,88 @@
   ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
+TEST_F(Arm64RelativePatcherTestDefault, BakerArray) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
+      20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+      // LR and SP/ZR are reserved.
+  };
+  auto ldr = [](uint32_t base_reg) {
+    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+    return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg;
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+    uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+    const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the lock word for gray bit check is loaded from the correct address
+    // before the base_reg which points to the array data.
+    static constexpr size_t kGrayCheckInsns = 5;
+    ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns);
+    int32_t data_offset =
+        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+    ASSERT_LT(offset, 0);
+    const uint32_t load_lock_word =
+        kLdurWInsn |
+        ((offset & 0x1ffu) << 12) |
+        (base_reg << 5) |
+        /* ip0 */ 16;
+    EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset));
+    // Verify the gray bit check.
+    const uint32_t check_gray_bit_without_offset =
+        0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+    EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu);
+    // Verify the fake dependency.
+    const uint32_t fake_dependency =
+        0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
+        (/* ip0 */ 16 << 16) |    // Xm = ip0
+        (base_reg << 5) |         // Xn = base_reg
+        base_reg;                 // Xd = base_reg
+    EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u));
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+  }
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
@@ -1180,7 +1271,7 @@
 
     // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
     ASSERT_GE(output_.size() - thunk_offset, 4u);
-    ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+    ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
     // Do not check the rest of the implementation.
 
     // The next thunk follows on the next aligned offset.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index ead4124..1578c0c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -519,7 +519,7 @@
   if (insn_set == kArm) insn_set = kThumb2;
   std::string error_msg;
   std::vector<std::string> compiler_options;
-  compiler_options.push_back("--compiler-filter=verify-at-runtime");
+  compiler_options.push_back("--compiler-filter=extract");
   SetupCompiler(compiler_kind, insn_set, compiler_options, /*out*/ &error_msg);
 
   jobject class_loader;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index af60def..6b5387a 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1675,7 +1675,7 @@
       if (UNLIKELY(!visitor->StartClass(dex_file, class_def_index))) {
         return false;
       }
-      if (compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
+      if (compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) {
         const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
         const uint8_t* class_data = dex_file->GetClassData(class_def);
         if (class_data != nullptr) {  // ie not an empty class, such as a marker interface
@@ -1757,7 +1757,7 @@
 }
 
 size_t OatWriter::InitOatMaps(size_t offset) {
-  if (!compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
+  if (!compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) {
     return offset;
   }
   {
@@ -1813,7 +1813,7 @@
 }
 
 size_t OatWriter::InitOatCodeDexFiles(size_t offset) {
-  if (!compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
+  if (!compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) {
     return offset;
   }
   InitCodeMethodVisitor code_visitor(this, offset, vdex_quickening_info_offset_);
@@ -1982,7 +1982,7 @@
     return false;
   }
 
-  if (compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
+  if (compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) {
     WriteQuickeningInfoMethodVisitor visitor(this, vdex_out, start_offset);
     if (!VisitDexMethods(&visitor)) {
       PLOG(ERROR) << "Failed to write the vdex quickening info. File: " << vdex_out->GetLocation();
@@ -2474,11 +2474,28 @@
                              /* verify */ true,
                              /* verify_checksum */ true,
                              &error_msg);
-  } else {
-    CHECK(oat_dex_file->source_.IsRawFile())
-        << static_cast<size_t>(oat_dex_file->source_.GetType());
+  } else if (oat_dex_file->source_.IsRawFile()) {
     File* raw_file = oat_dex_file->source_.GetRawFile();
     dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg);
+  } else {
+    // The source data is a vdex file.
+    CHECK(oat_dex_file->source_.IsRawData())
+        << static_cast<size_t>(oat_dex_file->source_.GetType());
+    const uint8_t* raw_dex_file = oat_dex_file->source_.GetRawData();
+    // Note: The raw data has already been checked to contain the header
+    // and all the data that the header specifies as the file size.
+    DCHECK(raw_dex_file != nullptr);
+    DCHECK(ValidateDexFileHeader(raw_dex_file, oat_dex_file->GetLocation()));
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+    // Since the source may have had its layout changed, or may be quickened, don't verify it.
+    dex_file = DexFile::Open(raw_dex_file,
+                             header->file_size_,
+                             location,
+                             oat_dex_file->dex_file_location_checksum_,
+                             nullptr,
+                             /* verify */ false,
+                             /* verify_checksum */ false,
+                             &error_msg);
   }
   if (dex_file == nullptr) {
     LOG(ERROR) << "Failed to open dex file for layout: " << error_msg;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 476906a..d38d5f8 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1749,6 +1749,7 @@
               phi = NewPhi(new_preheader, instruction, type);
             }
             user->ReplaceInput(phi, index);  // Removes the use node from the list.
+            induction_range_.Replace(user, instruction, phi);  // update induction
           }
         }
         // Scan all environment uses of an instruction and replace each later use with a phi node.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d7cc577..35dccd6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm.h"
 
+#include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
@@ -25,6 +26,7 @@
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_arm.h"
+#include "linker/arm/relative_patcher_thumb2.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -60,10 +62,41 @@
 
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const Register kBakerCcEntrypointRegister = R4;
+
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
 
+static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) {
+  DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister),
+            linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+  DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+  DCHECK_EQ(kBakerCcEntrypointRegister,
+            instruction->GetLocations()->GetTemp(
+                instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>());
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
+  DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit());
+  __ BindTrackedLabel(bne_label);
+  Label placeholder_label;
+  __ b(&placeholder_label, NE);  // Placeholder, patched at link-time.
+  __ Bind(&placeholder_label);
+}
+
 static constexpr int kRegListThreshold = 4;
 
 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -819,7 +852,7 @@
     // Baker's read barriers, we need to perform the load of
     // mirror::Object::monitor_ *before* the original reference load.
     // This load-load ordering is required by the read barrier.
-    // The fast path/slow path (for Baker's algorithm) should look like:
+    // The slow path (for Baker's algorithm) should look like:
     //
     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -954,6 +987,18 @@
 
     __ Bind(GetEntryLabel());
 
+    // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     old_ref = ref;
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
+    //   }
+
     // /* int32_t */ monitor = obj->monitor_
     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
     __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
@@ -1962,6 +2007,7 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -3067,6 +3113,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
@@ -5272,7 +5327,18 @@
   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
     // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation()) {
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // loads we need a temporary only if the offset is too big.
+      if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
@@ -5738,11 +5804,35 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
-  // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
-  // Also need for String compression feature.
-  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
-      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation() &&
+        instruction->GetIndex()->IsConstant()) {
+      // Array loads with constant index are treated as field loads.
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // constant index loads we need a temporary only if the offset is too big.
+      uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+      uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+      offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+      if (offset >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+               !Runtime::Current()->UseJitCompilation() &&
+               !instruction->GetIndex()->IsConstant()) {
+      // We need a non-scratch temporary for the array data pointer.
+      locations->AddTemp(Location::RequiresRegister());
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
+  } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    // Also need a temporary for String compression feature.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5854,8 +5944,20 @@
         Location temp = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
-        codegen_->GenerateArrayLoadWithBakerReadBarrier(
-            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+        DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+        if (index.IsConstant()) {
+          // Array load with a constant index can be treated as a field load.
+          data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                          out_loc,
+                                                          obj,
+                                                          data_offset,
+                                                          locations->GetTemp(0),
+                                                          /* needs_null_check */ false);
+        } else {
+          codegen_->GenerateArrayLoadWithBakerReadBarrier(
+              instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+        }
       } else {
         Register out = out_loc.AsRegister<Register>();
         if (index.IsConstant()) {
@@ -6692,6 +6794,13 @@
       // For non-Baker read barrier we have a temp-clobbering call.
     }
   }
+  if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+    if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+        (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+            !Runtime::Current()->UseJitCompilation())) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    }
+  }
 }
 
 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6871,6 +6980,9 @@
         // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
         // that the the kPrimNot result register is the same as the first argument register.
         locations->SetCustomSlowPathCallerSaves(caller_saves);
+        if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+          locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+        }
       } else {
         // For non-Baker read barrier we have a temp-clobbering call.
       }
@@ -7041,6 +7153,9 @@
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7914,48 +8029,93 @@
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used.
-      //
-      // Note that we do not actually check the value of
-      // `GetIsGcMarking()` to decide whether to mark the loaded GC
-      // root or not.  Instead, we load into `temp` the read barrier
-      // mark entry point corresponding to register `root`. If `temp`
-      // is null, it means that `GetIsGcMarking()` is false, and vice
-      // versa.
-      //
-      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
-      //     // Slow path.
-      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
-      //   }
+      if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+          !Runtime::Current()->UseJitCompilation()) {
+        // Note that we do not actually check the value of `GetIsGcMarking()`
+        // to decide whether to mark the loaded GC root or not.  Instead, we
+        // load into `temp` (actually kBakerCcEntrypointRegister) the read
+        // barrier mark introspection entrypoint. If `temp` is null, it means
+        // that `GetIsGcMarking()` is false, and vice versa.
+        //
+        // We use link-time generated thunks for the slow path. That thunk
+        // checks the reference and jumps to the entrypoint if needed.
+        //
+        //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+        //     lr = &return_address;
+        //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //     if (temp != nullptr) {
+        //        goto gc_root_thunk<root_reg>(lr)
+        //     }
+        //   return_address:
 
-      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
-      Location temp = Location::RegisterLocation(LR);
-      SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
-          instruction, root, /* entrypoint */ temp);
-      codegen_->AddSlowPath(slow_path);
+        CheckLastTempIsBakerCcEntrypointRegister(instruction);
+        uint32_t custom_data =
+            linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg);
+        Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
 
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+        // entrypoint_reg =
+        //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+        DCHECK_EQ(IP, 12);
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+        __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
 
-      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
-      static_assert(
-          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
-          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
-          "have different sizes.");
-      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
-                    "have different sizes.");
+        Label return_address;
+        __ AdrCode(LR, &return_address);
+        __ CmpConstant(kBakerCcEntrypointRegister, 0);
+        static_assert(
+            BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+            "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
+        // Currently the offset is always within range. If that changes,
+        // we shall have to split the load the same way as for fields.
+        DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+        ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+        __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+        EmitPlaceholderBne(codegen_, bne_label);
+        __ Bind(&return_address);
+      } else {
+        // Note that we do not actually check the value of
+        // `GetIsGcMarking()` to decide whether to mark the loaded GC
+        // root or not.  Instead, we load into `temp` the read barrier
+        // mark entry point corresponding to register `root`. If `temp`
+        // is null, it means that `GetIsGcMarking()` is false, and vice
+        // versa.
+        //
+        //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+        //     // Slow path.
+        //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
+        //   }
 
-      // The entrypoint is null when the GC is not marking, this prevents one load compared to
-      // checking GetIsGcMarking.
-      __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
-      __ Bind(slow_path->GetExitLabel());
+        // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+        Location temp = Location::RegisterLocation(LR);
+        SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+            instruction, root, /* entrypoint */ temp);
+        codegen_->AddSlowPath(slow_path);
+
+        // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+        // Loading the entrypoint does not require a load acquire since it is only changed when
+        // threads are suspended or running a checkpoint.
+        __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+
+        // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+        __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+        static_assert(
+            sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+            "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+            "have different sizes.");
+        static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                      "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                      "have different sizes.");
+
+        // The entrypoint is null when the GC is not marking, this prevents one load compared to
+        // checking GetIsGcMarking.
+        __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+        __ Bind(slow_path->GetExitLabel());
+      }
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
@@ -7973,6 +8133,16 @@
   }
 }
 
+void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+    if (!Runtime::Current()->UseJitCompilation()) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    }
+  }
+}
+
 void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                              Location ref,
                                                              Register obj,
@@ -7982,6 +8152,69 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = *(obj+offset);
+    //   gray_return_address:
+
+    DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+    Register base = obj;
+    if (offset >= kReferenceLoadMinFarOffset) {
+      base = temp.AsRegister<Register>();
+      DCHECK_NE(base, kBakerCcEntrypointRegister);
+      static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+      __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
+      offset &= (kReferenceLoadMinFarOffset - 1u);
+    }
+    CheckLastTempIsBakerCcEntrypointRegister(instruction);
+    uint32_t custom_data =
+        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj);
+    Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(IP, 12);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+    __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+    Label return_address;
+    __ AdrCode(LR, &return_address);
+    __ CmpConstant(kBakerCcEntrypointRegister, 0);
+    ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+    EmitPlaceholderBne(this, bne_label);
+    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
+                  " 2 32-bit instructions (8B) for heap poisoning.");
+    Register ref_reg = ref.AsRegister<Register>();
+    DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+    __ LoadFromOffset(kLoadWord, ref_reg, base, offset);
+    if (needs_null_check) {
+      MaybeRecordImplicitNullCheck(instruction);
+    }
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
   ScaleFactor no_scale_factor = TIMES_1;
@@ -8002,9 +8235,67 @@
   static_assert(
       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  ScaleFactor scale_factor = TIMES_4;
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    Register index_reg = index.AsRegister<Register>();
+    Register ref_reg = ref.AsRegister<Register>();
+    Register data_reg = temp.AsRegister<Register>();
+    DCHECK_NE(data_reg, kBakerCcEntrypointRegister);
+
+    CheckLastTempIsBakerCcEntrypointRegister(instruction);
+    uint32_t custom_data =
+        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg);
+    Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(IP, 12);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+    __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+    __ AddConstant(data_reg, obj, data_offset);
+
+    Label return_address;
+    __ AdrCode(LR, &return_address);
+    __ CmpConstant(kBakerCcEntrypointRegister, 0);
+    ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+    EmitPlaceholderBne(this, bne_label);
+    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
+                  " 2 32-bit instructions (8B) for heap poisoning.");
+    __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
       instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
@@ -8016,9 +8307,7 @@
                                                                  Location index,
                                                                  ScaleFactor scale_factor,
                                                                  Location temp,
-                                                                 bool needs_null_check,
-                                                                 bool always_update_field,
-                                                                 Register* temp2) {
+                                                                 bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -8029,6 +8318,73 @@
   // not.
   //
   // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //   }
+
+  Register temp_reg = temp.AsRegister<Register>();
+
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp2`.
+  Location temp2 = Location::RegisterLocation(LR);
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+          instruction,
+          ref,
+          obj,
+          offset,
+          index,
+          scale_factor,
+          needs_null_check,
+          temp_reg,
+          /* entrypoint */ temp2);
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset);
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel());
+  // Fast path: the GC is not marking: just load the reference.
+  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                Register obj,
+                                                                Location field_offset,
+                                                                Location temp,
+                                                                bool needs_null_check,
+                                                                Register temp2) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to update the reference
+  // field within `obj`.  Then, in the slow path, check the gray bit
+  // in the lock word of the reference's holder (`obj`) to decide
+  // whether to mark `ref` and update the field or not.
+  //
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
   // instead, we load into `temp3` the read barrier mark entry point
   // corresponding to register `ref`. If `temp3` is null, it means
   // that `GetIsGcMarking()` is false, and vice versa.
@@ -8041,52 +8397,30 @@
   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   //     if (is_gray) {
+  //       old_ref = ref;
   //       ref = temp3(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
   //     }
-  //   } else {
-  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
 
   Register temp_reg = temp.AsRegister<Register>();
 
-  // Slow path marking the object `ref` when the GC is marking. The
-  // entrypoint will already be loaded in `temp3`.
+  // Slow path updating the object reference at address `obj +
+  // field_offset` when the GC is marking. The entrypoint will already
+  // be loaded in `temp3`.
   Location temp3 = Location::RegisterLocation(LR);
-  SlowPathCodeARM* slow_path;
-  if (always_update_field) {
-    DCHECK(temp2 != nullptr);
-    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
-    // supports address of the form `obj + field_offset`, where `obj`
-    // is a register and `field_offset` is a register pair (of which
-    // only the lower half is used). Thus `offset` and `scale_factor`
-    // above are expected to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
-    Location field_offset = index;
-    slow_path =
-        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
-            instruction,
-            ref,
-            obj,
-            offset,
-            /* index */ field_offset,
-            scale_factor,
-            needs_null_check,
-            temp_reg,
-            *temp2,
-            /* entrypoint */ temp3);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
-        instruction,
-        ref,
-        obj,
-        offset,
-        index,
-        scale_factor,
-        needs_null_check,
-        temp_reg,
-        /* entrypoint */ temp3);
-  }
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+          instruction,
+          ref,
+          obj,
+          /* offset */ 0u,
+          /* index */ field_offset,
+          /* scale_factor */ ScaleFactor::TIMES_1,
+          needs_null_check,
+          temp_reg,
+          temp2,
+          /* entrypoint */ temp3);
   AddSlowPath(slow_path);
 
   // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8098,8 +8432,8 @@
   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   // checking GetIsGcMarking.
   __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
-  // Fast path: just load the reference.
-  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  // Fast path: the GC is not marking: nothing to do (the field is
+  // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -8370,6 +8704,11 @@
   return &patches->back();
 }
 
+Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) {
+  baker_read_barrier_patches_.emplace_back(custom_data);
+  return &baker_read_barrier_patches_.back().label;
+}
+
 Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
                                                              dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
@@ -8436,7 +8775,8 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -8470,6 +8810,10 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
+  for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+    linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(),
+                                                                       info.custom_data));
+  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 86f2f21..b94ee20 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -488,6 +488,11 @@
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
+
+  // Add a new baker read barrier patch and return the label to be bound
+  // before the BNE instruction.
+  Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
   Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
                                              dex::StringIndex string_index);
   Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -503,6 +508,10 @@
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+  // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+  void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -526,11 +535,6 @@
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
   // `ref`, and mark it if needed.
-  //
-  // If `always_update_field` is true, the value of the reference is
-  // atomically updated in the holder (`obj`).  This operation
-  // requires an extra temporary register, which must be provided as a
-  // non-null pointer (`temp2`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  Register obj,
@@ -538,9 +542,27 @@
                                                  Location index,
                                                  ScaleFactor scale_factor,
                                                  Location temp,
-                                                 bool needs_null_check,
-                                                 bool always_update_field = false,
-                                                 Register* temp2 = nullptr);
+                                                 bool needs_null_check);
+
+  // Generate code checking whether the the reference field at the
+  // address `obj + field_offset`, held by object `obj`, needs to be
+  // marked, and if so, marking it and updating the field within `obj`
+  // with the marked value.
+  //
+  // This routine is used for the implementation of the
+  // UnsafeCASObject intrinsic with Baker read barriers.
+  //
+  // This method has a structure similar to
+  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+  // `ref` is only as a temporary here, and thus its value should not
+  // be used afterwards.
+  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                Location ref,
+                                                Register obj,
+                                                Location field_offset,
+                                                Location temp,
+                                                bool needs_null_check,
+                                                Register temp2);
 
   // Generate a heap reference load (with no read barrier).
   void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -616,6 +638,13 @@
                                         Literal*,
                                         TypeReferenceValueComparator>;
 
+  struct BakerReadBarrierPatchInfo {
+    explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+    Label label;
+    uint32_t custom_data;
+  };
+
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
@@ -648,6 +677,8 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // Baker read barrier patch info.
+  ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4629c54..ed0a64c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -91,6 +91,7 @@
 
 // Flags controlling the use of link-time generated thunks for Baker read barriers.
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
 // Some instructions have special requirements for a temporary, for example
@@ -850,7 +851,7 @@
     // Baker's read barriers, we need to perform the load of
     // mirror::Object::monitor_ *before* the original reference load.
     // This load-load ordering is required by the read barrier.
-    // The fast path/slow path (for Baker's algorithm) should look like:
+    // The slow path (for Baker's algorithm) should look like:
     //
     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -1001,6 +1002,18 @@
 
     __ Bind(GetEntryLabel());
 
+    // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     old_ref = ref;
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
+    //   }
+
     // /* int32_t */ monitor = obj->monitor_
     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
@@ -2759,6 +2772,7 @@
     // Object ArrayGet with Baker's read barrier case.
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
     if (index.IsConstant()) {
       // Array load with a constant index can be treated as a field load.
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -2769,12 +2783,12 @@
                                                       obj.W(),
                                                       offset,
                                                       maybe_temp,
-                                                      /* needs_null_check */ true,
+                                                      /* needs_null_check */ false,
                                                       /* use_load_acquire */ false);
     } else {
       Register temp = WRegisterFrom(locations->GetTemp(0));
       codegen_->GenerateArrayLoadWithBakerReadBarrier(
-          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
     }
   } else {
     // General case.
@@ -5477,6 +5491,15 @@
   }
 }
 
+void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
@@ -5928,9 +5951,9 @@
           !Runtime::Current()->UseJitCompilation()) {
         // Note that we do not actually check the value of `GetIsGcMarking()`
         // to decide whether to mark the loaded GC root or not.  Instead, we
-        // load into `temp` the read barrier mark introspection entrypoint.
-        // If `temp` is null, it means that `GetIsGcMarking()` is false, and
-        // vice versa.
+        // load into `temp` (actually IP1) the read barrier mark introspection
+        // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+        // false, and vice versa.
         //
         // We use link-time generated thunks for the slow path. That thunk
         // checks the reference and jumps to the entrypoint if needed.
@@ -6054,24 +6077,24 @@
       !use_load_acquire &&
       !Runtime::Current()->UseJitCompilation()) {
     // Note that we do not actually check the value of `GetIsGcMarking()`
-    // to decide whether to mark the loaded GC root or not.  Instead, we
-    // load into `temp` the read barrier mark introspection entrypoint.
-    // If `temp` is null, it means that `GetIsGcMarking()` is false, and
-    // vice versa.
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually IP1) the read barrier mark introspection
+    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+    // false, and vice versa.
     //
     // We use link-time generated thunks for the slow path. That thunk checks
     // the holder and jumps to the entrypoint if needed. If the holder is not
     // gray, it creates a fake dependency and returns to the LDR instruction.
     //
     //     temp = Thread::Current()->pReadBarrierMarkIntrospection
-    //     lr = &return_address;
+    //     lr = &gray_return_address;
     //     if (temp != nullptr) {
     //        goto field_thunk<holder_reg, base_reg>(lr)
     //     }
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
     //     // into LDR, we use an adjusted base register here.
-    //     GcRoot<mirror::Object> root = *(obj+offset);
+    //     GcRoot<mirror::Object> reference = *(obj+offset);
     //   gray_return_address:
 
     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6141,16 +6164,74 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually IP1) the read barrier mark introspection
+    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+    // false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    DCHECK(temps.IsAvailable(ip0));
+    DCHECK(temps.IsAvailable(ip1));
+    temps.Exclude(ip0, ip1);
+    uint32_t custom_data =
+        linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+    vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+    // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip0.GetCode(), 16u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+    __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+    __ Add(temp.X(), obj.X(), Operand(data_offset));
+    EmissionCheckScope guard(GetVIXLAssembler(),
+                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+    vixl::aarch64::Label return_address;
+    __ adr(lr, &return_address);
+    __ Bind(cbnz_label);
+    __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Array LDR must be 1 instruction (4B) before the return address label; "
+                  " 2 instructions (8B) for heap poisoning.");
+    __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    return;
+  }
+
   // Array cells are never volatile variables, therefore array loads
   // never use Load-Acquire instructions on ARM64.
   const bool use_load_acquire = false;
 
-  static_assert(
-      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   GenerateReferenceLoadWithBakerReadBarrier(instruction,
                                             ref,
                                             obj,
@@ -6170,8 +6251,7 @@
                                                                    size_t scale_factor,
                                                                    Register temp,
                                                                    bool needs_null_check,
-                                                                   bool use_load_acquire,
-                                                                   bool always_update_field) {
+                                                                   bool use_load_acquire) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
   // If we are emitting an array load, we should not be using a
@@ -6208,41 +6288,18 @@
   // entrypoint will already be loaded in `temp2`.
   Register temp2 = lr;
   Location temp2_loc = LocationFrom(temp2);
-  SlowPathCodeARM64* slow_path;
-  if (always_update_field) {
-    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
-    // only supports address of the form `obj + field_offset`, where
-    // `obj` is a register and `field_offset` is a register. Thus
-    // `offset` and `scale_factor` above are expected to be null in
-    // this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
-    Location field_offset = index;
-    slow_path =
-        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
-            instruction,
-            ref,
-            obj,
-            offset,
-            /* index */ field_offset,
-            scale_factor,
-            needs_null_check,
-            use_load_acquire,
-            temp,
-            /* entrypoint */ temp2_loc);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
-        instruction,
-        ref,
-        obj,
-        offset,
-        index,
-        scale_factor,
-        needs_null_check,
-        use_load_acquire,
-        temp,
-        /* entrypoint */ temp2_loc);
-  }
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+          instruction,
+          ref,
+          obj,
+          offset,
+          index,
+          scale_factor,
+          needs_null_check,
+          use_load_acquire,
+          temp,
+          /* entrypoint */ temp2_loc);
   AddSlowPath(slow_path);
 
   // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -6254,12 +6311,83 @@
   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   // checking GetIsGcMarking.
   __ Cbnz(temp2, slow_path->GetEntryLabel());
-  // Fast path: just load the reference.
+  // Fast path: the GC is not marking: just load the reference.
   GenerateRawReferenceLoad(
       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
   __ Bind(slow_path->GetExitLabel());
 }
 
+void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                                  Location ref,
+                                                                  Register obj,
+                                                                  Location field_offset,
+                                                                  Register temp,
+                                                                  bool needs_null_check,
+                                                                  bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  // If we are emitting an array load, we should not be using a
+  // Load Acquire instruction.  In other words:
+  // `instruction->IsArrayGet()` => `!use_load_acquire`.
+  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
+
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to update the reference
+  // field within `obj`.  Then, in the slow path, check the gray bit
+  // in the lock word of the reference's holder (`obj`) to decide
+  // whether to mark `ref` and update the field or not.
+  //
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       old_ref = ref;
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
+  //     }
+  //   }
+
+  // Slow path updating the object reference at address `obj + field_offset`
+  // when the GC is marking. The entrypoint will already be loaded in `temp2`.
+  Register temp2 = lr;
+  Location temp2_loc = LocationFrom(temp2);
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+          instruction,
+          ref,
+          obj,
+          /* offset */ 0u,
+          /* index */ field_offset,
+          /* scale_factor */ 0u /* "times 1" */,
+          needs_null_check,
+          use_load_acquire,
+          temp,
+          /* entrypoint */ temp2_loc);
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ Cbnz(temp2, slow_path->GetEntryLabel());
+  // Fast path: the GC is not marking: nothing to do (the field is
+  // up-to-date, and we don't need to load the reference).
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
                                                   Location ref,
                                                   Register obj,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 332ab49..f16f625 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -318,12 +318,13 @@
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
-  vixl::aarch64::MemOperand CreateVecMemRegisters(
+  vixl::aarch64::MemOperand VecAddress(
       HVecMemoryOperation* instruction,
-      Location* reg_loc,
-      bool is_load,
       // This function may acquire a scratch register.
-      vixl::aarch64::UseScratchRegisterScope* temps_scope);
+      vixl::aarch64::UseScratchRegisterScope* temps_scope,
+      size_t size,
+      bool is_string_char_at,
+      /*out*/ vixl::aarch64::Register* scratch);
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
@@ -634,9 +635,6 @@
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
   // `ref`, and mark it if needed.
-  //
-  // If `always_update_field` is true, the value of the reference is
-  // atomically updated in the holder (`obj`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  vixl::aarch64::Register obj,
@@ -645,8 +643,27 @@
                                                  size_t scale_factor,
                                                  vixl::aarch64::Register temp,
                                                  bool needs_null_check,
-                                                 bool use_load_acquire,
-                                                 bool always_update_field = false);
+                                                 bool use_load_acquire);
+
+  // Generate code checking whether the the reference field at the
+  // address `obj + field_offset`, held by object `obj`, needs to be
+  // marked, and if so, marking it and updating the field within `obj`
+  // with the marked value.
+  //
+  // This routine is used for the implementation of the
+  // UnsafeCASObject intrinsic with Baker read barriers.
+  //
+  // This method has a structure similar to
+  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+  // `ref` is only as a temporary here, and thus its value should not
+  // be used afterwards.
+  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                Location ref,
+                                                vixl::aarch64::Register obj,
+                                                Location field_offset,
+                                                vixl::aarch64::Register temp,
+                                                bool needs_null_check,
+                                                bool use_load_acquire);
 
   // Generate a heap reference load (with no read barrier).
   void GenerateRawReferenceLoad(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b6678b0..8417f84 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm_vixl.h"
 
+#include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
@@ -24,6 +25,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics_arm_vixl.h"
+#include "linker/arm/relative_patcher_thumb2.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -77,6 +79,20 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const vixl32::Register kBakerCcEntrypointRegister = r4;
+
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
 #endif
@@ -88,6 +104,56 @@
 // Marker that code is yet to be, and must, be implemented.
 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
 
+static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
+                                                         HInstruction* instruction) {
+  DCHECK(temps->IsAvailable(ip));
+  temps->Exclude(ip);
+  DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
+  DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
+            linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+  DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+  DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
+      instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
+  ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
+  __ bind(patch_label);
+  vixl32::Label placeholder_label;
+  __ b(ne, EncodingSize(Wide), &placeholder_label);  // Placeholder, patched at link-time.
+  __ bind(&placeholder_label);
+}
+
+class EmitAdrCode {
+ public:
+  EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
+      : assembler_(assembler), rd_(rd), label_(label) {
+    ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+    adr_location_ = assembler->GetCursorOffset();
+    assembler->adr(EncodingSize(Wide), rd, label);
+  }
+
+  ~EmitAdrCode() {
+    DCHECK(label_->IsBound());
+    // The ADR emitted by the assembler does not set the Thumb mode bit we need.
+    // TODO: Maybe extend VIXL to allow ADR for return address?
+    uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
+    // Expecting ADR encoding T3 with `(offset & 1) == 0`.
+    DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u);           // Check bits 24-31, except 26.
+    DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu);           // Check bits 16-23.
+    DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode());   // Check bits 8-11 and 15.
+    DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u);           // Check bit 0, i.e. the `offset & 1`.
+    // Add the Thumb mode bit.
+    raw_adr[2] |= 0x01u;
+  }
+
+ private:
+  ArmVIXLMacroAssembler* const assembler_;
+  vixl32::Register rd_;
+  vixl32::Label* const label_;
+  int32_t adr_location_;
+};
+
 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
 // for each live D registers they treat two corresponding S registers as live ones.
 //
@@ -845,7 +911,7 @@
     // Baker's read barriers, we need to perform the load of
     // mirror::Object::monitor_ *before* the original reference load.
     // This load-load ordering is required by the read barrier.
-    // The fast path/slow path (for Baker's algorithm) should look like:
+    // The slow path (for Baker's algorithm) should look like:
     //
     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -987,6 +1053,18 @@
 
     __ Bind(GetEntryLabel());
 
+    // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     old_ref = ref;
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
+    //   }
+
     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
 
     // /* int32_t */ monitor = obj->monitor_
@@ -2012,6 +2090,7 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -3103,6 +3182,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
@@ -5280,7 +5368,18 @@
   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
     // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation()) {
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // loads we need a temporary only if the offset is too big.
+      if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
@@ -5747,11 +5846,35 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
-  // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
-  // Also need for String compression feature.
-  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
-      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation() &&
+        instruction->GetIndex()->IsConstant()) {
+      // Array loads with constant index are treated as field loads.
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // constant index loads we need a temporary only if the offset is too big.
+      uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+      uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+      offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+      if (offset >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+               !Runtime::Current()->UseJitCompilation() &&
+               !instruction->GetIndex()->IsConstant()) {
+      // We need a non-scratch temporary for the array data pointer.
+      locations->AddTemp(Location::RequiresRegister());
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
+  } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    // Also need a temporary for String compression feature.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5862,8 +5985,20 @@
         Location temp = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
-        codegen_->GenerateArrayLoadWithBakerReadBarrier(
-            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+        DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+        if (index.IsConstant()) {
+          // Array load with a constant index can be treated as a field load.
+          data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                          out_loc,
+                                                          obj,
+                                                          data_offset,
+                                                          locations->GetTemp(0),
+                                                          /* needs_null_check */ false);
+        } else {
+          codegen_->GenerateArrayLoadWithBakerReadBarrier(
+              instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+        }
       } else {
         vixl32::Register out = OutputRegister(instruction);
         if (index.IsConstant()) {
@@ -6753,6 +6888,13 @@
       // For non-Baker read barrier we have a temp-clobbering call.
     }
   }
+  if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+    if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+        (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+            !Runtime::Current()->UseJitCompilation())) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    }
+  }
 }
 
 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6929,6 +7071,9 @@
         // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
         // that the the kPrimNot result register is the same as the first argument register.
         locations->SetCustomSlowPathCallerSaves(caller_saves);
+        if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+          locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+        }
       } else {
         // For non-Baker read barrier we have a temp-clobbering call.
       }
@@ -7091,6 +7236,9 @@
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+  }
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7989,48 +8137,96 @@
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used.
-      //
-      // Note that we do not actually check the value of
-      // `GetIsGcMarking()` to decide whether to mark the loaded GC
-      // root or not.  Instead, we load into `temp` the read barrier
-      // mark entry point corresponding to register `root`. If `temp`
-      // is null, it means that `GetIsGcMarking()` is false, and vice
-      // versa.
-      //
-      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
-      //     // Slow path.
-      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
-      //   }
+      if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+          !Runtime::Current()->UseJitCompilation()) {
+        // Note that we do not actually check the value of `GetIsGcMarking()`
+        // to decide whether to mark the loaded GC root or not.  Instead, we
+        // load into `temp` (actually kBakerCcEntrypointRegister) the read
+        // barrier mark introspection entrypoint. If `temp` is null, it means
+        // that `GetIsGcMarking()` is false, and vice versa.
+        //
+        // We use link-time generated thunks for the slow path. That thunk
+        // checks the reference and jumps to the entrypoint if needed.
+        //
+        //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+        //     lr = &return_address;
+        //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //     if (temp != nullptr) {
+        //        goto gc_root_thunk<root_reg>(lr)
+        //     }
+        //   return_address:
 
-      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
-      Location temp = LocationFrom(lr);
-      SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
-              instruction, root, /* entrypoint */ temp);
-      codegen_->AddSlowPath(slow_path);
+        UseScratchRegisterScope temps(GetVIXLAssembler());
+        ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+        uint32_t custom_data =
+            linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+        vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
 
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+        // entrypoint_reg =
+        //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+        DCHECK_EQ(ip.GetCode(), 12u);
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+        __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
 
-      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
-      static_assert(
-          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
-          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
-          "have different sizes.");
-      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
-                    "have different sizes.");
+        vixl::EmissionCheckScope guard(GetVIXLAssembler(),
+                                       4 * vixl32::kMaxInstructionSizeInBytes);
+        vixl32::Label return_address;
+        EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+        __ cmp(kBakerCcEntrypointRegister, Operand(0));
+        static_assert(
+            BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+            "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
+        // Currently the offset is always within range. If that changes,
+        // we shall have to split the load the same way as for fields.
+        DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+        __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset));
+        EmitPlaceholderBne(codegen_, bne_label);
+        __ Bind(&return_address);
+      } else {
+        // Note that we do not actually check the value of
+        // `GetIsGcMarking()` to decide whether to mark the loaded GC
+        // root or not.  Instead, we load into `temp` the read barrier
+        // mark entry point corresponding to register `root`. If `temp`
+        // is null, it means that `GetIsGcMarking()` is false, and vice
+        // versa.
+        //
+        //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+        //     // Slow path.
+        //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
+        //   }
 
-      // The entrypoint is null when the GC is not marking, this prevents one load compared to
-      // checking GetIsGcMarking.
-      __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
-      __ Bind(slow_path->GetExitLabel());
+        // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+        Location temp = LocationFrom(lr);
+        SlowPathCodeARMVIXL* slow_path =
+            new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+                instruction, root, /* entrypoint */ temp);
+        codegen_->AddSlowPath(slow_path);
+
+        // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+        // Loading the entrypoint does not require a load acquire since it is only changed when
+        // threads are suspended or running a checkpoint.
+        GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
+        // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+        GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+        static_assert(
+            sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+            "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+            "have different sizes.");
+        static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                      "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                      "have different sizes.");
+
+        // The entrypoint is null when the GC is not marking, this prevents one load compared to
+        // checking GetIsGcMarking.
+        __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
+        __ Bind(slow_path->GetExitLabel());
+      }
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
@@ -8048,6 +8244,16 @@
   }
 }
 
+void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+    if (!Runtime::Current()->UseJitCompilation()) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    }
+  }
+}
+
 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                  Location ref,
                                                                  vixl32::Register obj,
@@ -8057,6 +8263,75 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = *(obj+offset);
+    //   gray_return_address:
+
+    DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+    vixl32::Register base = obj;
+    if (offset >= kReferenceLoadMinFarOffset) {
+      base = RegisterFrom(temp);
+      DCHECK(!base.Is(kBakerCcEntrypointRegister));
+      static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+      __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+      offset &= (kReferenceLoadMinFarOffset - 1u);
+    }
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+    uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+        base.GetCode(),
+        obj.GetCode());
+    vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip.GetCode(), 12u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+    __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+    vixl::EmissionCheckScope guard(
+        GetVIXLAssembler(),
+        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+    vixl32::Label return_address;
+    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+    __ cmp(kBakerCcEntrypointRegister, Operand(0));
+    EmitPlaceholderBne(this, bne_label);
+    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
+                  " 2 32-bit instructions (8B) for heap poisoning.");
+    vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+    __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset));
+    if (needs_null_check) {
+      MaybeRecordImplicitNullCheck(instruction);
+    }
+    // Note: We need a Wide NEG for the unpoisoning.
+    if (kPoisonHeapReferences) {
+      __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    }
+    __ Bind(&return_address);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
   ScaleFactor no_scale_factor = TIMES_1;
@@ -8077,9 +8352,73 @@
   static_assert(
       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  ScaleFactor scale_factor = TIMES_4;
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+    vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+    vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt);  // Raw pointer.
+    DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
+
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+    uint32_t custom_data =
+        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+    vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip.GetCode(), 12u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+    __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+    __ Add(data_reg, obj, Operand(data_offset));
+
+    vixl::EmissionCheckScope guard(
+        GetVIXLAssembler(),
+        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+    vixl32::Label return_address;
+    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+    __ cmp(kBakerCcEntrypointRegister, Operand(0));
+    EmitPlaceholderBne(this, bne_label);
+    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
+                  " 2 32-bit instructions (8B) for heap poisoning.");
+    __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    // Note: We need a Wide NEG for the unpoisoning.
+    if (kPoisonHeapReferences) {
+      __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    }
+    __ Bind(&return_address);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
       instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
@@ -8091,9 +8430,7 @@
                                                                      Location index,
                                                                      ScaleFactor scale_factor,
                                                                      Location temp,
-                                                                     bool needs_null_check,
-                                                                     bool always_update_field,
-                                                                     vixl32::Register* temp2) {
+                                                                     bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -8104,6 +8441,73 @@
   // not.
   //
   // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //   }
+
+  vixl32::Register temp_reg = RegisterFrom(temp);
+
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp2`.
+  Location temp2 = LocationFrom(lr);
+  SlowPathCodeARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+          instruction,
+          ref,
+          obj,
+          offset,
+          index,
+          scale_factor,
+          needs_null_check,
+          temp_reg,
+          /* entrypoint */ temp2);
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel());
+  // Fast path: the GC is not marking: just load the reference.
+  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    vixl32::Register obj,
+                                                                    Location field_offset,
+                                                                    Location temp,
+                                                                    bool needs_null_check,
+                                                                    vixl32::Register temp2) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to update the reference
+  // field within `obj`.  Then, in the slow path, check the gray bit
+  // in the lock word of the reference's holder (`obj`) to decide
+  // whether to mark `ref` and update the field or not.
+  //
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
   // instead, we load into `temp3` the read barrier mark entry point
   // corresponding to register `ref`. If `temp3` is null, it means
   // that `GetIsGcMarking()` is false, and vice versa.
@@ -8113,55 +8517,32 @@
   //     // Slow path.
   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   //     if (is_gray) {
+  //       old_ref = ref;
   //       ref = temp3(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
   //     }
-  //   } else {
-  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
 
   vixl32::Register temp_reg = RegisterFrom(temp);
 
-  // Slow path marking the object `ref` when the GC is marking. The
-  // entrypoint will already be loaded in `temp3`.
+  // Slow path updating the object reference at address `obj + field_offset`
+  // when the GC is marking. The entrypoint will already be loaded in `temp3`.
   Location temp3 = LocationFrom(lr);
-  SlowPathCodeARMVIXL* slow_path;
-  if (always_update_field) {
-    DCHECK(temp2 != nullptr);
-    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
-    // only supports address of the form `obj + field_offset`, where
-    // `obj` is a register and `field_offset` is a register pair (of
-    // which only the lower half is used). Thus `offset` and
-    // `scale_factor` above are expected to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
-    Location field_offset = index;
-    slow_path =
-        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
-            instruction,
-            ref,
-            obj,
-            offset,
-            /* index */ field_offset,
-            scale_factor,
-            needs_null_check,
-            temp_reg,
-            *temp2,
-            /* entrypoint */ temp3);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
-        instruction,
-        ref,
-        obj,
-        offset,
-        index,
-        scale_factor,
-        needs_null_check,
-        temp_reg,
-        /* entrypoint */ temp3);
-  }
+  SlowPathCodeARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+          instruction,
+          ref,
+          obj,
+          /* offset */ 0u,
+          /* index */ field_offset,
+          /* scale_factor */ ScaleFactor::TIMES_1,
+          needs_null_check,
+          temp_reg,
+          temp2,
+          /* entrypoint */ temp3);
   AddSlowPath(slow_path);
 
   // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8173,8 +8554,8 @@
   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   // checking GetIsGcMarking.
   __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
-  // Fast path: just load the reference.
-  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  // Fast path: the GC is not marking: nothing to do (the field is
+  // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -8488,6 +8869,11 @@
   return &patches->back();
 }
 
+vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+  baker_read_barrier_patches_.emplace_back(custom_data);
+  return &baker_read_barrier_patches_.back().label;
+}
+
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral(
     const DexFile& dex_file,
     dex::StringIndex string_index) {
@@ -8512,10 +8898,6 @@
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
-  return DeduplicateUint32Literal(address, &uint32_literals_);
-}
-
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
     const DexFile& dex_file,
     dex::StringIndex string_index,
@@ -8569,7 +8951,8 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -8603,6 +8986,10 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
+  for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+    linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+                                                                       info.custom_data));
+  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 1e9669d..657d3c1 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -572,12 +572,16 @@
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
+
+  // Add a new baker read barrier patch and return the label to be bound
+  // before the BNE instruction.
+  vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
   VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
                                                        dex::StringIndex string_index);
   VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
                                                      dex::TypeIndex type_index);
   VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
-  VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
   VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
                                                  dex::StringIndex string_index,
                                                  Handle<mirror::String> handle);
@@ -589,6 +593,10 @@
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+  // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+  void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -612,11 +620,6 @@
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
   // `ref`, and mark it if needed.
-  //
-  // If `always_update_field` is true, the value of the reference is
-  // atomically updated in the holder (`obj`).  This operation
-  // requires an extra temporary register, which must be provided as a
-  // non-null pointer (`temp2`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  vixl::aarch32::Register obj,
@@ -624,9 +627,27 @@
                                                  Location index,
                                                  ScaleFactor scale_factor,
                                                  Location temp,
-                                                 bool needs_null_check,
-                                                 bool always_update_field = false,
-                                                 vixl::aarch32::Register* temp2 = nullptr);
+                                                 bool needs_null_check);
+
+  // Generate code checking whether the the reference field at the
+  // address `obj + field_offset`, held by object `obj`, needs to be
+  // marked, and if so, marking it and updating the field within `obj`
+  // with the marked value.
+  //
+  // This routine is used for the implementation of the
+  // UnsafeCASObject intrinsic with Baker read barriers.
+  //
+  // This method has a structure similar to
+  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+  // `ref` is only as a temporary here, and thus its value should not
+  // be used afterwards.
+  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                Location ref,
+                                                vixl::aarch32::Register obj,
+                                                Location field_offset,
+                                                Location temp,
+                                                bool needs_null_check,
+                                                vixl::aarch32::Register temp2);
 
   // Generate a heap reference load (with no read barrier).
   void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -713,6 +734,13 @@
                                         VIXLUInt32Literal*,
                                         TypeReferenceValueComparator>;
 
+  struct BakerReadBarrierPatchInfo {
+    explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+    vixl::aarch32::Label label;
+    uint32_t custom_data;
+  };
+
   VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method,
                                               MethodToLiteralMap* map);
@@ -750,6 +778,8 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // Baker read barrier patch info.
+  ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index aa030b2..357df97 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -219,15 +219,33 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
+    Location out = locations->Out();
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
-
+    const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+    const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier);
+    InvokeRuntimeCallingConvention calling_convention;
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+    const bool is_load_class_bss_entry =
+        (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    InvokeRuntimeCallingConvention calling_convention;
+    // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+    Register entry_address = kNoRegister;
+    if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) {
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0));
+      // In the unlucky case that `temp` is A0, we preserve the address in `out` across the
+      // kSaveEverything call.
+      entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp;
+      DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+      if (temp_is_a0) {
+        __ Move(entry_address, temp);
+      }
+    }
+
     dex::TypeIndex type_index = cls_->GetTypeIndex();
     __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
-
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
     mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
@@ -237,25 +255,27 @@
       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
     }
 
+    // For HLoadClass/kBssEntry, store the resolved class to the BSS entry.
+    if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) {
+      // The class entry address was preserved in `entry_address` thanks to kSaveEverything.
+      __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0);
+    }
+
     // Move the class to the desired location.
-    Location out = locations->Out();
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
       Primitive::Type type = instruction_->GetType();
-      mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+      mips_codegen->MoveLocation(out,
+                                 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                 type);
     }
-
     RestoreLiveRegisters(codegen, locations);
-    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
-    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
-    if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
-      DCHECK(out.IsValid());
-      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
-      // kSaveEverything and use a temporary for the .bss entry address in the fast path,
-      // so that we can avoid another calculation here.
-      bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+
+    // For HLoadClass/kBssEntry, store the resolved class to the BSS entry.
+    if (is_load_class_bss_entry && !r2_baker_or_no_read_barriers) {
+      // For non-Baker read barriers (or on R6), we need to re-calculate the address of
+      // the class entry.
       Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
-      DCHECK_NE(out.AsRegister<Register>(), AT);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
       bool reordering = __ SetReorder(false);
@@ -286,40 +306,62 @@
   explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    DCHECK(instruction_->IsLoadString());
+    DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    HLoadString* load = instruction_->AsLoadString();
+    const dex::StringIndex string_index = load->GetStringIndex();
+    Register out = locations->Out().AsRegister<Register>();
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
-
+    const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+    const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier);
+    InvokeRuntimeCallingConvention calling_convention;
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    InvokeRuntimeCallingConvention calling_convention;
-    HLoadString* load = instruction_->AsLoadString();
-    const dex::StringIndex string_index = load->GetStringIndex();
+    // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+    Register entry_address = kNoRegister;
+    if (r2_baker_or_no_read_barriers) {
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0));
+      // In the unlucky case that `temp` is A0, we preserve the address in `out` across the
+      // kSaveEverything call.
+      entry_address = temp_is_a0 ? out : temp;
+      DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+      if (temp_is_a0) {
+        __ Move(entry_address, temp);
+      }
+    }
+
     __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
     mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+    // Store the resolved string to the BSS entry.
+    if (r2_baker_or_no_read_barriers) {
+      // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+      __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0);
+    }
+
     Primitive::Type type = instruction_->GetType();
     mips_codegen->MoveLocation(locations->Out(),
-                               calling_convention.GetReturnLocation(type),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                type);
-
     RestoreLiveRegisters(codegen, locations);
 
-    // Store the resolved String to the BSS entry.
-    // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
-    // .bss entry address in the fast path, so that we can avoid another calculation here.
-    bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
-    Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
-    Register out = locations->Out().AsRegister<Register>();
-    DCHECK_NE(out, AT);
-    CodeGeneratorMIPS::PcRelativePatchInfo* info =
-        mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
-    bool reordering = __ SetReorder(false);
-    mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
-    __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
-    __ SetReorder(reordering);
-
+    // Store the resolved string to the BSS entry.
+    if (!r2_baker_or_no_read_barriers) {
+      // For non-Baker read barriers (or on R6), we need to re-calculate the address of
+      // the string entry.
+      Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+      bool reordering = __ SetReorder(false);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+      __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
+    }
     __ B(GetExitLabel());
   }
 
@@ -1719,15 +1761,14 @@
   DCHECK_EQ(code[literal_offset + 1], 0x12);
   DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00);
   DCHECK_EQ(code[literal_offset + 3], 0x3C);
-  // lw reg, reg, addr32_low
+  // instr reg, reg, addr32_low
   DCHECK_EQ(code[literal_offset + 4], 0x78);
   DCHECK_EQ(code[literal_offset + 5], 0x56);
-  DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C);
-  addr32 += (addr32 & 0x8000) << 1;  // Account for sign extension in "lw reg, reg, addr32_low".
+  addr32 += (addr32 & 0x8000) << 1;  // Account for sign extension in "instr reg, reg, addr32_low".
   // lui reg, addr32_high
   code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16);
   code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24);
-  // lw reg, reg, addr32_low
+  // instr reg, reg, addr32_low
   code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0);
   code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8);
 }
@@ -2436,6 +2477,9 @@
                                                    object_array_get_with_read_barrier
                                                        ? LocationSummary::kCallOnSlowPath
                                                        : LocationSummary::kNoCall);
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(type)) {
@@ -5767,6 +5811,9 @@
               ? LocationSummary::kCallOnSlowPath
               : LocationSummary::kNoCall));
 
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
     InvokeRuntimeCallingConvention calling_convention;
@@ -6445,6 +6492,7 @@
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool baker_read_barrier_slow_path = false;
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
@@ -6452,6 +6500,7 @@
     case TypeCheckKind::kArrayObjectCheck:
       call_kind =
           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      baker_read_barrier_slow_path = kUseBakerReadBarrier;
       break;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
@@ -6461,6 +6510,9 @@
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  if (baker_read_barrier_slow_path) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
@@ -7048,26 +7100,27 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
-        cls,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        calling_convention.GetReturnLocation(Primitive::kPrimNot));
+    Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
-
+  const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kBssEntry:
-      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+      if (isR6) {
         break;
       }
       FALLTHROUGH_INTENDED;
@@ -7078,6 +7131,22 @@
       break;
   }
   locations->SetOut(Location::RequiresRegister());
+  if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+    if (!kUseReadBarrier || kUseBakerReadBarrier) {
+      // Rely on the type resolution or initialization and marking to save everything we need.
+      // Request a temp to hold the BSS entry location for the slow path on R2
+      // (no benefit for R6).
+      if (!isR6) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      RegisterSet caller_saves = RegisterSet::Empty();
+      InvokeRuntimeCallingConvention calling_convention;
+      caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+      locations->SetCustomSlowPathCallerSaves(caller_saves);
+    } else {
+      // For non-Baker read barriers we have a temp-clobbering call.
+    }
+  }
 }
 
 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -7160,10 +7229,22 @@
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
-      bool reordering = __ SetReorder(false);
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
-      __ SetReorder(reordering);
+      constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
+      if (isR6 || non_baker_read_barrier) {
+        bool reordering = __ SetReorder(false);
+        codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+        GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
+        __ SetReorder(reordering);
+      } else {
+        // On R2 save the BSS entry address in a temporary register instead of
+        // recalculating it in the slow path.
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        bool reordering = __ SetReorder(false);
+        codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg);
+        __ Addiu(temp, temp, /* placeholder */ 0x5678);
+        __ SetReorder(reordering);
+        GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option);
+      }
       generate_null_check = true;
       break;
     }
@@ -7227,13 +7308,14 @@
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
+  const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
     case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadString::LoadKind::kBssEntry:
-      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+      if (isR6) {
         break;
       }
       FALLTHROUGH_INTENDED;
@@ -7246,9 +7328,25 @@
   }
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+    locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and marking to save everything we need.
+        // Request a temp to hold the BSS entry location for the slow path on R2
+        // (no benefit for R6).
+        if (!isR6) {
+          locations->AddTemp(Location::RequiresRegister());
+        }
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barriers we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -7305,14 +7403,26 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      bool reordering = __ SetReorder(false);
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      GenerateGcRootFieldLoad(load,
-                              out_loc,
-                              out,
-                              /* placeholder */ 0x5678,
-                              kCompilerReadBarrierOption);
-      __ SetReorder(reordering);
+      constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
+      if (isR6 || non_baker_read_barrier) {
+        bool reordering = __ SetReorder(false);
+        codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+        GenerateGcRootFieldLoad(load,
+                                out_loc,
+                                out,
+                                /* placeholder */ 0x5678,
+                                kCompilerReadBarrierOption);
+        __ SetReorder(reordering);
+      } else {
+        // On R2 save the BSS entry address in a temporary register instead of
+        // recalculating it in the slow path.
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        bool reordering = __ SetReorder(false);
+        codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg);
+        __ Addiu(temp, temp, /* placeholder */ 0x5678);
+        __ SetReorder(reordering);
+        GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
+      }
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqz(out, slow_path->GetEntryLabel());
@@ -7342,6 +7452,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -7766,6 +7877,15 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 3875c4b..03939e3 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -229,9 +229,10 @@
   // We switch to the table-based method starting with 7 cases.
   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 19250c6..a9c4964 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -141,7 +141,8 @@
 
 class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
+  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction)
+      : SlowPathCodeMIPS64(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -192,7 +193,9 @@
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
       Primitive::Type type = instruction_->GetType();
-      mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+      mips64_codegen->MoveLocation(out,
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                   type);
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -200,10 +203,6 @@
     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
     if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
       DCHECK(out.IsValid());
-      // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
-      // kSaveEverything and use a temporary for the .bss entry address in the fast path,
-      // so that we can avoid another calculation here.
-      DCHECK_NE(out.AsRegister<GpuRegister>(), AT);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
       mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -250,16 +249,13 @@
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     Primitive::Type type = instruction_->GetType();
     mips64_codegen->MoveLocation(locations->Out(),
-                                 calling_convention.GetReturnLocation(type),
+                                 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                  type);
 
     RestoreLiveRegisters(codegen, locations);
 
     // Store the resolved String to the BSS entry.
-    // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
-    // .bss entry address in the fast path, so that we can avoid another calculation here.
     GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-    DCHECK_NE(out, AT);
     CodeGeneratorMIPS64::PcRelativePatchInfo* info =
         mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
     mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -1986,6 +1982,9 @@
                                                    object_array_get_with_read_barrier
                                                        ? LocationSummary::kCallOnSlowPath
                                                        : LocationSummary::kNoCall);
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(type)) {
@@ -3982,6 +3981,9 @@
       object_field_get_with_read_barrier
           ? LocationSummary::kCallOnSlowPath
           : LocationSummary::kNoCall);
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
@@ -4544,6 +4546,7 @@
 void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool baker_read_barrier_slow_path = false;
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
@@ -4551,6 +4554,7 @@
     case TypeCheckKind::kArrayObjectCheck:
       call_kind =
           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      baker_read_barrier_slow_path = kUseBakerReadBarrier;
       break;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
@@ -4560,6 +4564,9 @@
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  if (baker_read_barrier_slow_path) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
@@ -5077,10 +5084,8 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
-        cls,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        calling_convention.GetReturnLocation(Primitive::kPrimNot));
+    Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
@@ -5090,10 +5095,24 @@
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
+  if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+    if (!kUseReadBarrier || kUseBakerReadBarrier) {
+      // Rely on the type resolution or initialization and marking to save everything we need.
+      RegisterSet caller_saves = RegisterSet::Empty();
+      InvokeRuntimeCallingConvention calling_convention;
+      caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+      locations->SetCustomSlowPathCallerSaves(caller_saves);
+    } else {
+      // For non-Baker read barrier we have a temp-clobbering call.
+    }
+  }
 }
 
 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -5224,9 +5243,20 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
-    locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+    locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and marking to save everything we need.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -5294,6 +5324,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -5653,6 +5684,15 @@
   }
 }
 
+void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index fd1a174..200e884 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -226,9 +226,10 @@
   // We switch to the table-based method starting with 7 cases.
   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 93befa4..57f7e6b 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,6 +22,7 @@
 namespace art {
 namespace arm64 {
 
+using helpers::DRegisterFrom;
 using helpers::VRegisterFrom;
 using helpers::HeapOperand;
 using helpers::InputRegisterAt;
@@ -771,20 +772,22 @@
   }
 }
 
-// Helper to set up registers and address for vector memory operations.
-MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
+// Helper to set up locations for vector memory operations. Returns the memory operand and,
+// if used, sets the output parameter scratch to a temporary register used in this operand,
+// so that the client can release it right after the memory operand use.
+MemOperand InstructionCodeGeneratorARM64::VecAddress(
     HVecMemoryOperation* instruction,
-    Location* reg_loc,
-    bool is_load,
-    UseScratchRegisterScope* temps_scope) {
+    UseScratchRegisterScope* temps_scope,
+    size_t size,
+    bool is_string_char_at,
+    /*out*/ Register* scratch) {
   LocationSummary* locations = instruction->GetLocations();
   Register base = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
-  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
-
-  Primitive::Type packed_type = instruction->GetPackedType();
-  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
-  size_t shift = Primitive::ComponentSizeShift(packed_type);
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
+  size_t shift = ComponentSizeShiftWidth(size);
 
   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
@@ -793,10 +796,9 @@
     offset += Int64ConstantFrom(index) << shift;
     return HeapOperand(base, offset);
   } else {
-    Register temp = temps_scope->AcquireSameSizeAs(base);
-    __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift));
-
-    return HeapOperand(temp, offset);
+    *scratch = temps_scope->AcquireSameSizeAs(base);
+    __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
+    return HeapOperand(*scratch, offset);
   }
 }
 
@@ -805,15 +807,43 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
-  Location reg_loc = Location::NoLocation();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VRegister reg = VRegisterFrom(locations->Out());
   UseScratchRegisterScope temps(GetVIXLAssembler());
-  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true, &temps);
-  VRegister reg = VRegisterFrom(reg_loc);
+  Register scratch;
 
   switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      // Special handling of compressed/uncompressed string load.
+      if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        vixl::aarch64::Label uncompressed_load, done;
+        // Test compression bit.
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        Register length = temps.AcquireW();
+        __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
+        __ Tbnz(length.W(), 0, &uncompressed_load);
+        temps.Release(length);  // no longer needed
+        // Zero extend 8 compressed bytes into 8 chars.
+        __ Ldr(DRegisterFrom(locations->Out()).V8B(),
+               VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+        __ Uxtl(reg.V8H(), reg.V8B());
+        __ B(&done);
+        if (scratch.IsValid()) {
+          temps.Release(scratch);  // if used, no longer needed
+        }
+        // Load 8 direct uncompressed chars.
+        __ Bind(&uncompressed_load);
+        __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+        __ Bind(&done);
+        return;
+      }
+      FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimFloat:
@@ -821,7 +851,7 @@
     case Primitive::kPrimDouble:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Ldr(reg, mem);
+      __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -834,10 +864,11 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
-  Location reg_loc = Location::NoLocation();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VRegister reg = VRegisterFrom(locations->InAt(2));
   UseScratchRegisterScope temps(GetVIXLAssembler());
-  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false, &temps);
-  VRegister reg = VRegisterFrom(reg_loc);
+  Register scratch;
 
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
@@ -850,7 +881,7 @@
     case Primitive::kPrimDouble:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Str(reg, mem);
+      __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 013b092..5bb19c1 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -201,6 +201,7 @@
 
 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  // Integral-abs requires a temporary for the comparison.
   if (instruction->GetPackedType() == Primitive::kPrimInt) {
     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   }
@@ -766,16 +767,10 @@
   }
 }
 
-// Helper to set up registers and address for vector memory operations.
-static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
-                                     Location* reg_loc,
-                                     bool is_load) {
-  LocationSummary* locations = instruction->GetLocations();
+// Helper to construct address for vector memory operations.
+static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
   Location base = locations->InAt(0);
   Location index = locations->InAt(1);
-  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
-  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
-  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
   ScaleFactor scale = TIMES_1;
   switch (size) {
     case 2: scale = TIMES_2; break;
@@ -783,22 +778,53 @@
     case 8: scale = TIMES_8; break;
     default: break;
   }
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
 }
 
 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+  // String load requires a temporary for the compressed load.
+  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, instruction->IsStringCharAt());
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      // Special handling of compressed/uncompressed string load.
+      if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        NearLabel done, not_compressed;
+        XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+        // Test compression bit.
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
+        __ j(kNotZero, &not_compressed);
+        // Zero extend 8 compressed bytes into 8 chars.
+        __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+        __ pxor(tmp, tmp);
+        __ punpcklbw(reg, tmp);
+        __ jmp(&done);
+        // Load 4 direct uncompressed chars.
+        __ Bind(&not_compressed);
+        is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
+        __ Bind(&done);
+        return;
+      }
+      FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
@@ -825,9 +851,10 @@
 }
 
 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
+  XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 66f19a4..6d4aae8 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -194,6 +194,7 @@
 
 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  // Integral-abs requires a temporary for the comparison.
   if (instruction->GetPackedType() == Primitive::kPrimInt) {
     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   }
@@ -755,16 +756,10 @@
   }
 }
 
-// Helper to set up registers and address for vector memory operations.
-static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
-                                     Location* reg_loc,
-                                     bool is_load) {
-  LocationSummary* locations = instruction->GetLocations();
+// Helper to construct address for vector memory operations.
+static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
   Location base = locations->InAt(0);
   Location index = locations->InAt(1);
-  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
-  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
-  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
   ScaleFactor scale = TIMES_1;
   switch (size) {
     case 2: scale = TIMES_2; break;
@@ -772,22 +767,53 @@
     case 8: scale = TIMES_8; break;
     default: break;
   }
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
 }
 
 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+  // String load requires a temporary for the compressed load.
+  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, instruction->IsStringCharAt());
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      // Special handling of compressed/uncompressed string load.
+      if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        NearLabel done, not_compressed;
+        XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+        // Test compression bit.
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
+        __ j(kNotZero, &not_compressed);
+        // Zero extend 8 compressed bytes into 8 chars.
+        __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+        __ pxor(tmp, tmp);
+        __ punpcklbw(reg, tmp);
+        __ jmp(&done);
+        // Load 8 direct uncompressed chars.
+        __ Bind(&not_compressed);
+        is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
+        __ Bind(&done);
+        return;
+      }
+      FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
@@ -814,9 +840,10 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
+  XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 08a752f..1e867dd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2057,6 +2057,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ff6e099..f413739 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2165,6 +2165,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 34b52a8..aea901d 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -338,14 +338,21 @@
 
   // Ensure the inputs of `instruction` are defined in a block of the graph.
   for (HInstruction* input : instruction->GetInputs()) {
-    const HInstructionList& list = input->IsPhi()
-        ? input->GetBlock()->GetPhis()
-        : input->GetBlock()->GetInstructions();
-    if (!list.Contains(input)) {
-      AddError(StringPrintf("Input %d of instruction %d is not defined "
-                            "in a basic block of the control-flow graph.",
+    if (input->GetBlock() == nullptr) {
+      AddError(StringPrintf("Input %d of instruction %d is not in any "
+                            "basic block of the control-flow graph.",
                             input->GetId(),
                             instruction->GetId()));
+    } else {
+      const HInstructionList& list = input->IsPhi()
+          ? input->GetBlock()->GetPhis()
+          : input->GetBlock()->GetInstructions();
+      if (!list.Contains(input)) {
+        AddError(StringPrintf("Input %d of instruction %d is not defined "
+                              "in a basic block of the control-flow graph.",
+                              input->GetId(),
+                              instruction->GetId()));
+      }
     }
   }
 
@@ -497,8 +504,7 @@
                             "has a null pointer as last input.",
                             invoke->DebugName(),
                             invoke->GetId()));
-    }
-    if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
+    } else if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
       AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
                             "has a last instruction (%s:%d) which is neither a clinit check "
                             "nor a load class instruction.",
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 1f8a58c..3b681c1 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -140,6 +140,14 @@
   DCHECK_NE(total_number_of_instructions_, 0u);
   DCHECK_NE(inlining_budget_, 0u);
 
+  // If we're compiling with a core image (which is only used for
+  // test purposes), honor inlining directives in method names:
+  // - if a method's name contains the substring "$inline$", ensure
+  //   that this method is actually inlined;
+  // - if a method's name contains the substring "$noinline$", do not
+  //   inline that method.
+  const bool honor_inlining_directives = IsCompilingWithCoreImage();
+
   // Keep a copy of all blocks when starting the visit.
   ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
   DCHECK(!blocks.empty());
@@ -152,7 +160,7 @@
       HInvoke* call = instruction->AsInvoke();
       // As long as the call is not intrinsified, it is worth trying to inline.
       if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
-        if (kIsDebugBuild && IsCompilingWithCoreImage()) {
+        if (honor_inlining_directives) {
           // Debugging case: directives in method names control or assert on inlining.
           std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
               call->GetDexMethodIndex(), /* with_signature */ false);
@@ -809,7 +817,17 @@
   }
 
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  bool is_referrer = (klass.Get() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  bool is_referrer;
+  ArtMethod* outermost_art_method = outermost_graph_->GetArtMethod();
+  if (outermost_art_method == nullptr) {
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    // We are in AOT mode and we don't have an ART method to determine
+    // if the inlined method belongs to the referrer. Assume it doesn't.
+    is_referrer = false;
+  } else {
+    is_referrer = klass.Get() == outermost_art_method->GetDeclaringClass();
+  }
+
   // Note that we will just compare the classes, so we don't need Java semantics access checks.
   // Note that the type index and the dex file are relative to the method this type guard is
   // inlined into.
@@ -1462,8 +1480,13 @@
         }
       }
       if (needs_constructor_barrier) {
-        HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
-        invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
+        // See CompilerDriver::RequiresConstructorBarrier for more details.
+        DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence";
+
+        HConstructorFence* constructor_fence =
+            new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena());
+        invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence,
+                                                                invoke_instruction);
       }
       *return_replacement = nullptr;
       break;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 978c6a2..8b79da8 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -451,10 +451,13 @@
                                                               referrer_method_id.class_idx_,
                                                               parameter_index++,
                                                               Primitive::kPrimNot,
-                                                              true);
+                                                              /* is_this */ true);
     AppendInstruction(parameter);
     UpdateLocal(locals_index++, parameter);
     number_of_parameters--;
+    current_this_parameter_ = parameter;
+  } else {
+    DCHECK(current_this_parameter_ == nullptr);
   }
 
   const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
@@ -465,7 +468,7 @@
         arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
         parameter_index++,
         Primitive::GetType(shorty[shorty_pos]),
-        false);
+        /* is_this */ false);
     ++shorty_pos;
     AppendInstruction(parameter);
     // Store the parameter value in the local that the dex code will use
@@ -588,6 +591,8 @@
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
+// Does the method being compiled need any constructor barriers being inserted?
+// (Always 'false' for methods that aren't <init>.)
 static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
   // Can be null in unit tests only.
   if (UNLIKELY(cu == nullptr)) {
@@ -596,6 +601,11 @@
 
   Thread* self = Thread::Current();
   return cu->IsConstructor()
+      && !cu->IsStatic()
+      // RequiresConstructorBarrier must only be queried for <init> methods;
+      // it's effectively "false" for every other method.
+      //
+      // See CompilerDriver::RequiresConstructBarrier for more explanation.
       && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
 }
 
@@ -639,13 +649,24 @@
                                       Primitive::Type type,
                                       uint32_t dex_pc) {
   if (type == Primitive::kPrimVoid) {
+    // Only <init> (which is a return-void) could possibly have a constructor fence.
     // This may insert additional redundant constructor fences from the super constructors.
     // TODO: remove redundant constructor fences (b/36656456).
     if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
-      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+      // Compiling instance constructor.
+      if (kIsDebugBuild) {
+        std::string method_name = graph_->GetMethodName();
+        CHECK_EQ(std::string("<init>"), method_name);
+      }
+
+      HInstruction* fence_target = current_this_parameter_;
+      DCHECK(fence_target != nullptr);
+
+      AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
     }
     AppendInstruction(new (arena_) HReturnVoid(dex_pc));
   } else {
+    DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_));
     HInstruction* value = LoadLocal(instruction.VRegA(), type);
     AppendInstruction(new (arena_) HReturn(value, dex_pc));
   }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 7fdc188..2fb5c7b 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -62,6 +62,7 @@
         current_block_(nullptr),
         current_locals_(nullptr),
         latest_result_(nullptr),
+        current_this_parameter_(nullptr),
         compiler_driver_(driver),
         code_generator_(code_generator),
         dex_compilation_unit_(dex_compilation_unit),
@@ -325,6 +326,11 @@
   HBasicBlock* current_block_;
   ArenaVector<HInstruction*>* current_locals_;
   HInstruction* latest_result_;
+  // Current "this" parameter.
+  // Valid only after InitializeParameters() finishes.
+  // * Null for static methods.
+  // * Non-null for instance methods.
+  HParameterValue* current_this_parameter_;
 
   CompilerDriver* const compiler_driver_;
 
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 7d1f146..c39e5f4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -247,6 +247,7 @@
       access->GetType() == Primitive::kPrimNot) {
     // For object arrays, the read barrier instrumentation requires
     // the original array pointer.
+    // TODO: This can be relaxed for Baker CC.
     return false;
   }
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 750f9cc..69cf9a1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1010,17 +1010,14 @@
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
       // Need to make sure the reference stored in the field is a to-space
       // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+      codegen->UpdateReferenceFieldWithBakerReadBarrier(
           invoke,
           out_loc,  // Unused, used only as a "temporary" within the read barrier.
           base,
-          /* offset */ 0u,
-          /* index */ offset_loc,
-          ScaleFactor::TIMES_1,
+          /* field_offset */ offset_loc,
           tmp_ptr_loc,
           /* needs_null_check */ false,
-          /* always_update_field */ true,
-          &tmp);
+          tmp);
     }
   }
 
@@ -1648,6 +1645,8 @@
     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
     // temporary register from the register allocator.
     locations->AddTemp(Location::RequiresRegister());
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_);
+    arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4d36015..65a8222 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1154,17 +1154,14 @@
       Register temp = WRegisterFrom(locations->GetTemp(0));
       // Need to make sure the reference stored in the field is a to-space
       // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+      codegen->UpdateReferenceFieldWithBakerReadBarrier(
           invoke,
           out_loc,  // Unused, used only as a "temporary" within the read barrier.
           base,
-          /* offset */ 0u,
-          /* index */ offset_loc,
-          /* scale_factor */ 0u,
+          /* field_offset */ offset_loc,
           temp,
           /* needs_null_check */ false,
-          /* use_load_acquire */ false,
-          /* always_update_field */ true);
+          /* use_load_acquire */ false);
     }
   }
 
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index fd8a37a..356d5bc 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1347,17 +1347,14 @@
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
       // Need to make sure the reference stored in the field is a to-space
       // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+      codegen->UpdateReferenceFieldWithBakerReadBarrier(
           invoke,
           out_loc,  // Unused, used only as a "temporary" within the read barrier.
           base,
-          /* offset */ 0u,
-          /* index */ offset_loc,
-          ScaleFactor::TIMES_1,
+          /* field_offset */ offset_loc,
           tmp_ptr_loc,
           /* needs_null_check */ false,
-          /* always_update_field */ true,
-          &tmp);
+          tmp);
     }
   }
 
@@ -2026,6 +2023,8 @@
     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
     // temporary register from the register allocator.
     locations->AddTemp(Location::RequiresRegister());
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
+    arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 41df56b..abf5b12 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -32,7 +32,7 @@
 namespace mips {
 
 IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen)
-  : arena_(codegen->GetGraph()->GetArena()) {
+  : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) {
 }
 
 MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() {
@@ -1525,6 +1525,9 @@
                                                                 ? LocationSummary::kCallOnSlowPath
                                                                 : LocationSummary::kNoCall),
                                                            kIntrinsified);
+  if (can_call && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
@@ -3133,6 +3136,89 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+// long java.lang.Integer.valueOf(long)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  InstructionCodeGeneratorMIPS* icodegen =
+      down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor());
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadConst32(out, address);
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadConst32(calling_convention.GetRegisterAt(0), address);
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    MipsLabel allocate, done;
+    int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
+
+    // Is (info.low <= in) && (in <= info.high)?
+    __ Addiu32(out, in, -info.low);
+    // As unsigned quantities is out < (info.high - info.low + 1)?
+    if (IsInt<16>(count)) {
+      __ Sltiu(AT, out, count);
+    } else {
+      __ LoadConst32(AT, count);
+      __ Sltu(AT, out, AT);
+    }
+    // Branch if out >= (info.high - info.low + 1).
+    // This means that "in" is outside of the range [info.low, info.high].
+    __ Beqz(AT, &allocate);
+
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadConst32(TMP, data_offset + address);
+    __ ShiftAndAdd(out, out, TMP, TIMES_4);
+    __ Lw(out, out, 0);
+    __ MaybeUnpoisonHeapReference(out);
+    __ B(&done);
+
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadConst32(calling_convention.GetRegisterAt(0), address);
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 // Unimplemented intrinsics.
 
 UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
@@ -3162,8 +3248,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
 
-UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
-
 UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index e134cb8..eaadad2 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -49,6 +49,7 @@
   bool TryDispatch(HInvoke* invoke);
 
  private:
+  CodeGeneratorMIPS* codegen_;
   ArenaAllocator* arena_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b57b41f..9dce59b 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -32,7 +32,7 @@
 namespace mips64 {
 
 IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen)
-  : arena_(codegen->GetGraph()->GetArena()) {
+  : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) {
 }
 
 Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() {
@@ -1168,6 +1168,9 @@
                                                                 ? LocationSummary::kCallOnSlowPath
                                                                 : LocationSummary::kNoCall),
                                                            kIntrinsified);
+  if (can_call && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
@@ -2564,6 +2567,84 @@
   GenFPToFPCall(invoke, codegen_, kQuickTanh);
 }
 
+// long java.lang.Integer.valueOf(long)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  InstructionCodeGeneratorMIPS64* icodegen =
+      down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadConst64(out, address);
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadConst64(calling_convention.GetRegisterAt(0), address);
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+    Mips64Label allocate, done;
+    int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
+
+    // Is (info.low <= in) && (in <= info.high)?
+    __ Addiu32(out, in, -info.low);
+    // As unsigned quantities is out < (info.high - info.low + 1)?
+    __ LoadConst32(AT, count);
+    // Branch if out >= (info.high - info.low + 1).
+    // This means that "in" is outside of the range [info.low, info.high].
+    __ Bgeuc(out, AT, &allocate);
+
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadConst64(TMP, data_offset + address);
+    __ Dlsa(out, out, TMP, TIMES_4);
+    __ Lwu(out, out, 0);
+    __ MaybeUnpoisonHeapReference(out);
+    __ Bc(&done);
+
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadConst64(calling_convention.GetRegisterAt(0), address);
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
 
@@ -2583,8 +2664,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
-
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 5b95c26..179627a 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -49,6 +49,7 @@
   bool TryDispatch(HInvoke* invoke);
 
  private:
+  CodeGeneratorMIPS64* codegen_;
   ArenaAllocator* arena_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 48699b3..8d8cc93 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -566,14 +566,22 @@
       store->GetBlock()->RemoveInstruction(store);
     }
 
-    // Eliminate allocations that are not used.
+    // Eliminate singleton-classified instructions:
+    //   * - Constructor fences (they never escape this thread).
+    //   * - Allocations (if they are unused).
     for (HInstruction* new_instance : singleton_new_instances_) {
+      HConstructorFence::RemoveConstructorFences(new_instance);
+
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
         new_instance->GetBlock()->RemoveInstruction(new_instance);
       }
     }
     for (HInstruction* new_array : singleton_new_arrays_) {
+      // TODO: Delete constructor fences for new-array
+      // In the future HNewArray instructions will have HConstructorFence's for them.
+      // HConstructorFence::RemoveConstructorFences(new_array);
+
       if (!new_array->HasNonEnvironmentUses()) {
         new_array->RemoveEnvironmentUsers();
         new_array->GetBlock()->RemoveInstruction(new_array);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 5a95abd..bbc55dd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -733,12 +733,6 @@
     }
     return true;
   } else if (instruction->IsArrayGet()) {
-    // Strings are different, with a different offset to the actual data
-    // and some compressed to save memory. For now, all cases are rejected
-    // to avoid the complexity.
-    if (instruction->AsArrayGet()->IsStringCharAt()) {
-      return false;
-    }
     // Accept a right-hand-side array base[index] for
     // (1) exact matching vector type,
     // (2) loop-invariant base,
@@ -839,17 +833,22 @@
     // TODO: accept symbolic, albeit loop invariant shift factors.
     HInstruction* opa = instruction->InputAt(0);
     HInstruction* opb = instruction->InputAt(1);
-    if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) {
-      if (generate_code) {
-        // Make sure shift factor only looks at lower bits, as defined for sequential shifts.
-        // Note that even the narrower SIMD shifts do the right thing after that.
-        int32_t mask = (instruction->GetType() == Primitive::kPrimLong)
-            ? kMaxLongShiftDistance
-            : kMaxIntShiftDistance;
-        HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask);
-        GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+    int64_t value = 0;
+    if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) {
+      // Make sure shift distance only looks at lower bits, as defined for sequential shifts.
+      int64_t mask = (instruction->GetType() == Primitive::kPrimLong)
+          ? kMaxLongShiftDistance
+          : kMaxIntShiftDistance;
+      int64_t distance = value & mask;
+      // Restrict shift distance to packed data type width.
+      int64_t max_distance = Primitive::ComponentSize(type) * 8;
+      if (0 <= distance && distance < max_distance) {
+        if (generate_code) {
+          HInstruction* s = graph_->GetIntConstant(distance);
+          GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+        }
+        return true;
       }
-      return true;
     }
   } else if (instruction->IsInvokeStaticOrDirect()) {
     // Accept particular intrinsics.
@@ -1001,8 +1000,9 @@
       vector = new (global_allocator_) HVecStore(
           global_allocator_, org->InputAt(0), opa, opb, type, vector_length_);
     } else  {
+      bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
       vector = new (global_allocator_) HVecLoad(
-          global_allocator_, org->InputAt(0), opa, type, vector_length_);
+          global_allocator_, org->InputAt(0), opa, type, vector_length_, is_string_char_at);
     }
   } else {
     // Scalar store or load.
@@ -1010,7 +1010,9 @@
     if (opb != nullptr) {
       vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc);
     } else  {
-      vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc);
+      bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
+      vector = new (global_allocator_) HArrayGet(
+          org->InputAt(0), opa, type, kNoDexPc, is_string_char_at);
     }
   }
   vector_map_->Put(org, vector);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca953a1..f250c1a 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -528,6 +528,15 @@
   return cached_current_method_;
 }
 
+const char* HGraph::GetMethodName() const {
+  const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
+  return dex_file_.GetMethodName(method_id);
+}
+
+std::string HGraph::PrettyMethod(bool with_signature) const {
+  return dex_file_.PrettyMethod(method_idx_, with_signature);
+}
+
 HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) {
   switch (type) {
     case Primitive::Type::kPrimBoolean:
@@ -1150,6 +1159,81 @@
   }
 }
 
+void HVariableInputSizeInstruction::RemoveAllInputs() {
+  RemoveAsUserOfAllInputs();
+  DCHECK(!HasNonEnvironmentUses());
+
+  inputs_.clear();
+  DCHECK_EQ(0u, InputCount());
+}
+
+void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+  DCHECK(instruction->GetBlock() != nullptr);
+  // Removing constructor fences only makes sense for instructions with an object return type.
+  DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
+
+  // Efficient implementation that simultaneously (in one pass):
+  // * Scans the uses list for all constructor fences.
+  // * Deletes that constructor fence from the uses list of `instruction`.
+  // * Deletes `instruction` from the constructor fence's inputs.
+  // * Deletes the constructor fence if it now has 0 inputs.
+
+  const HUseList<HInstruction*>& uses = instruction->GetUses();
+  // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt.
+  for (auto it = uses.begin(), end = uses.end(); it != end; ) {
+    const HUseListNode<HInstruction*>& use_node = *it;
+    HInstruction* const use_instruction = use_node.GetUser();
+
+    // Advance the iterator immediately once we fetch the use_node.
+    // Warning: If the input is removed, the current iterator becomes invalid.
+    ++it;
+
+    if (use_instruction->IsConstructorFence()) {
+      HConstructorFence* ctor_fence = use_instruction->AsConstructorFence();
+      size_t input_index = use_node.GetIndex();
+
+      // Process the candidate instruction for removal
+      // from the graph.
+
+      // Constructor fence instructions are never
+      // used by other instructions.
+      //
+      // If we wanted to make this more generic, it
+      // could be a runtime if statement.
+      DCHECK(!ctor_fence->HasUses());
+
+      // A constructor fence's return type is "kPrimVoid"
+      // and therefore it can't have any environment uses.
+      DCHECK(!ctor_fence->HasEnvironmentUses());
+
+      // Remove the inputs first, otherwise removing the instruction
+      // will try to remove its uses while we are already removing uses
+      // and this operation will fail.
+      DCHECK_EQ(instruction, ctor_fence->InputAt(input_index));
+
+      // Removing the input will also remove the `use_node`.
+      // (Do not look at `use_node` after this, it will be a dangling reference).
+      ctor_fence->RemoveInputAt(input_index);
+
+      // Once all inputs are removed, the fence is considered dead and
+      // is removed.
+      if (ctor_fence->InputCount() == 0u) {
+        ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+      }
+    }
+  }
+
+  if (kIsDebugBuild) {
+    // Post-condition checks:
+    // * None of the uses of `instruction` are a constructor fence.
+    // * The `instruction` itself did not get removed from a block.
+    for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) {
+      CHECK(!use_node.GetUser()->IsConstructorFence());
+    }
+    CHECK(instruction->GetBlock() != nullptr);
+  }
+}
+
 #define DEFINE_ACCEPT(name, super)                                             \
 void H##name::Accept(HGraphVisitor* visitor) {                                 \
   visitor->Visit##name(this);                                                  \
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8368026..e40361e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -46,6 +46,7 @@
 
 class GraphChecker;
 class HBasicBlock;
+class HConstructorFence;
 class HCurrentMethod;
 class HDoubleConstant;
 class HEnvironment;
@@ -57,6 +58,7 @@
 class HInvoke;
 class HLongConstant;
 class HNullConstant;
+class HParameterValue;
 class HPhi;
 class HSuspendCheck;
 class HTryBoundary;
@@ -537,6 +539,12 @@
     return method_idx_;
   }
 
+  // Get the method name (without the signature), e.g. "<init>"
+  const char* GetMethodName() const;
+
+  // Get the pretty method name (class + name + optionally signature).
+  std::string PrettyMethod(bool with_signature = true) const;
+
   InvokeType GetInvokeType() const {
     return invoke_type_;
   }
@@ -1297,6 +1305,7 @@
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
+  M(ConstructorFence, Instruction)                                      \
   M(CurrentMethod, Instruction)                                         \
   M(ShouldDeoptimizeFlag, Instruction)                                  \
   M(Deoptimize, Instruction)                                            \
@@ -1476,8 +1485,11 @@
 template <typename T>
 class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
+  // Get the instruction which has this use as one of the inputs.
   T GetUser() const { return user_; }
+  // Get the position of the input record that this use corresponds to.
   size_t GetIndex() const { return index_; }
+  // Set the position of the input record that this use corresponds to.
   void SetIndex(size_t index) { index_ = index; }
 
   // Hook for the IntrusiveForwardList<>.
@@ -2037,7 +2049,8 @@
         !IsNativeDebugInfo() &&
         !IsParameterValue() &&
         // If we added an explicit barrier then we should keep it.
-        !IsMemoryBarrier();
+        !IsMemoryBarrier() &&
+        !IsConstructorFence();
   }
 
   bool IsDeadAndRemovable() const {
@@ -2431,6 +2444,11 @@
   void InsertInputAt(size_t index, HInstruction* input);
   void RemoveInputAt(size_t index);
 
+  // Removes all the inputs.
+  // Also removes this instructions from each input's use list
+  // (for non-environment uses only).
+  void RemoveAllInputs();
+
  protected:
   HVariableInputSizeInstruction(SideEffects side_effects,
                                 uint32_t dex_pc,
@@ -5069,7 +5087,7 @@
   const DexFile& GetDexFile() const { return dex_file_; }
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
-  bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); }
+  bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
 
   bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
   void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
@@ -5377,10 +5395,16 @@
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // TODO: We can be smarter here.
-    // Currently, the array access is always preceded by an ArrayLength or a NullCheck
-    // which generates the implicit null check. There are cases when these can be removed
-    // to produce better code. If we ever add optimizations to do so we should allow an
-    // implicit check here (as long as the address falls in the first page).
+    // Currently, unless the array is the result of NewArray, the array access is always
+    // preceded by some form of null NullCheck necessary for the bounds check, usually
+    // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for
+    // dynamic BCE. There are cases when these could be removed to produce better code.
+    // If we ever add optimizations to do so we should allow an implicit check here
+    // (as long as the address falls in the first page).
+    //
+    // As an example of such fancy optimization, we could eliminate BoundsCheck for
+    //     a = cond ? new int[1] : null;
+    //     a[0];  // The Phi does not need bounds check for either input.
     return false;
   }
 
@@ -6501,6 +6525,137 @@
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
+// A constructor fence orders all prior stores to fields that could be accessed via a final field of
+// the specified object(s), with respect to any subsequent store that might "publish"
+// (i.e. make visible) the specified object to another thread.
+//
+// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens
+// for all final fields (that were set) at the end of the invoked constructor.
+//
+// The constructor fence models the freeze actions for the final fields of an object
+// being constructed (semantically at the end of the constructor). Constructor fences
+// have a per-object affinity; two separate objects being constructed get two separate
+// constructor fences.
+//
+// (Note: that if calling a super-constructor or forwarding to another constructor,
+// the freezes would happen at the end of *that* constructor being invoked).
+//
+// The memory model guarantees that when the object being constructed is "published" after
+// constructor completion (i.e. escapes the current thread via a store), then any final field
+// writes must be observable on other threads (once they observe that publication).
+//
+// Further, anything written before the freeze, and read by dereferencing through the final field,
+// must also be visible (so final object field could itself have an object with non-final fields;
+// yet the freeze must also extend to them).
+//
+// Constructor example:
+//
+//     class HasFinal {
+//        final int field;                              Optimizing IR for <init>()V:
+//        HasFinal() {
+//          field = 123;                                HInstanceFieldSet(this, HasFinal.field, 123)
+//          // freeze(this.field);                      HConstructorFence(this)
+//        }                                             HReturn
+//     }
+//
+// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of
+// already-initialized classes; in that case the allocation must act as a "default-initializer"
+// of the object which effectively writes the class pointer "final field".
+//
+// For example, we can model default-initialiation as roughly the equivalent of the following:
+//
+//     class Object {
+//       private final Class header;
+//     }
+//
+//  Java code:                                           Optimizing IR:
+//
+//     T new_instance<T>() {
+//       Object obj = allocate_memory(T.class.size);     obj = HInvoke(art_quick_alloc_object, T)
+//       obj.header = T.class;                           // header write is done by above call.
+//       // freeze(obj.header)                           HConstructorFence(obj)
+//       return (T)obj;
+//     }
+//
+// See also:
+// * CompilerDriver::RequiresConstructorBarrier
+// * QuasiAtomic::ThreadFenceForConstructor
+//
+class HConstructorFence FINAL : public HVariableInputSizeInstruction {
+                                  // A fence has variable inputs because the inputs can be removed
+                                  // after prepare_for_register_allocation phase.
+                                  // (TODO: In the future a fence could freeze multiple objects
+                                  //        after merging two fences together.)
+ public:
+  // `fence_object` is the reference that needs to be protected for correct publication.
+  //
+  // It makes sense in the following situations:
+  // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true).
+  // * new-instance-like instructions, it's the return value (i.e. HNewInstance).
+  //
+  // After construction the `fence_object` becomes the 0th input.
+  // This is not an input in a real sense, but just a convenient place to stash the information
+  // about the associated object.
+  HConstructorFence(HInstruction* fence_object,
+                    uint32_t dex_pc,
+                    ArenaAllocator* arena)
+    // We strongly suspect there is not a more accurate way to describe the fine-grained reordering
+    // constraints described in the class header. We claim that these SideEffects constraints
+    // enforce a superset of the real constraints.
+    //
+    // The ordering described above is conservatively modeled with SideEffects as follows:
+    //
+    // * To prevent reordering of the publication stores:
+    // ----> "Reads of objects" is the initial SideEffect.
+    // * For every primitive final field store in the constructor:
+    // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect.
+    // * If there are any stores to reference final fields in the constructor:
+    // ----> Use a more conservative "AllReads" SideEffect because any stores to any references
+    //       that are reachable from `fence_object` also need to be prevented for reordering
+    //       (and we do not want to do alias analysis to figure out what those stores are).
+    //
+    // In the implementation, this initially starts out as an "all reads" side effect; this is an
+    // even more conservative approach than the one described above, and prevents all of the
+    // above reordering without analyzing any of the instructions in the constructor.
+    //
+    // If in a later phase we discover that there are no writes to reference final fields,
+    // we can refine the side effect to a smaller set of type reads (see above constraints).
+      : HVariableInputSizeInstruction(SideEffects::AllReads(),
+                                      dex_pc,
+                                      arena,
+                                      /* number_of_inputs */ 1,
+                                      kArenaAllocConstructorFenceInputs) {
+    DCHECK(fence_object != nullptr);
+    SetRawInputAt(0, fence_object);
+  }
+
+  // The object associated with this constructor fence.
+  //
+  // (Note: This will be null after the prepare_for_register_allocation phase,
+  // as all constructor fence inputs are removed there).
+  HInstruction* GetFenceObject() const {
+    return InputAt(0);
+  }
+
+  // Find all the HConstructorFence uses (`fence_use`) for `this` and:
+  // - Delete `fence_use` from `this`'s use list.
+  // - Delete `this` from `fence_use`'s inputs list.
+  // - If the `fence_use` is dead, remove it from the graph.
+  //
+  // A fence is considered dead once it no longer has any uses
+  // and all of the inputs are dead.
+  //
+  // This must *not* be called during/after prepare_for_register_allocation,
+  // because that removes all the inputs to the fences but the fence is actually
+  // still considered live.
+  static void RemoveConstructorFences(HInstruction* instruction);
+
+  DECLARE_INSTRUCTION(ConstructorFence);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HConstructorFence);
+};
+
 class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
   enum class OperationKind {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index fb9dfb7..52c247b 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -98,7 +98,7 @@
 
   DECLARE_ABSTRACT_INSTRUCTION(VecOperation);
 
- private:
+ protected:
   // Additional packed bits.
   static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
   static constexpr size_t kFieldTypeSize =
@@ -107,6 +107,7 @@
   static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
   using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
 
+ private:
   const size_t vector_length_;
 
   DISALLOW_COPY_AND_ASSIGN(HVecOperation);
@@ -191,6 +192,24 @@
   DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
 };
 
+// Packed type consistency checker (same vector length integral types may mix freely).
+inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) {
+  DCHECK(input->IsVecOperation());
+  Primitive::Type input_type = input->AsVecOperation()->GetPackedType();
+  switch (input_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      return type == Primitive::kPrimBoolean ||
+             type == Primitive::kPrimByte;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return type == Primitive::kPrimChar ||
+             type == Primitive::kPrimShort;
+    default:
+      return type == input_type;
+  }
+}
+
 //
 // Definitions of concrete unary vector operations in HIR.
 //
@@ -221,8 +240,7 @@
                 size_t vector_length,
                 uint32_t dex_pc = kNoDexPc)
       : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
-    DCHECK(input->IsVecOperation());
-    DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(input, packed_type));
   }
 
   // TODO: probably integral promotion
@@ -244,7 +262,7 @@
           uint32_t dex_pc = kNoDexPc)
       : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
     DCHECK(input->IsVecOperation());
-    DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type);  // actual convert
+    DCHECK_NE(GetInputType(), GetResultType());  // actual convert
   }
 
   Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); }
@@ -266,8 +284,7 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
-    DCHECK(input->IsVecOperation());
-    DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(input, packed_type));
   }
   DECLARE_INSTRUCTION(VecNeg);
  private:
@@ -284,8 +301,7 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
-    DCHECK(input->IsVecOperation());
-    DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(input, packed_type));
   }
   DECLARE_INSTRUCTION(VecAbs);
  private:
@@ -325,9 +341,8 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
   }
   DECLARE_INSTRUCTION(VecAdd);
  private:
@@ -348,22 +363,24 @@
                  bool is_unsigned,
                  bool is_rounded,
                  uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc),
-        is_unsigned_(is_unsigned),
-        is_rounded_(is_rounded) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
+    SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned);
+    SetPackedFlag<kFieldHAddIsRounded>(is_rounded);
   }
 
-  bool IsUnsigned() const { return is_unsigned_; }
-  bool IsRounded() const { return is_rounded_; }
+  bool IsUnsigned() const { return GetPackedFlag<kFieldHAddIsUnsigned>(); }
+  bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); }
 
   DECLARE_INSTRUCTION(VecHalvingAdd);
 
  private:
-  bool is_unsigned_;
-  bool is_rounded_;
+  // Additional packed bits.
+  static constexpr size_t kFieldHAddIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+  static constexpr size_t kFieldHAddIsRounded = kFieldHAddIsUnsigned + 1;
+  static constexpr size_t kNumberOfHAddPackedBits = kFieldHAddIsRounded + 1;
+  static_assert(kNumberOfHAddPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
 
   DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd);
 };
@@ -379,9 +396,8 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
   }
   DECLARE_INSTRUCTION(VecSub);
  private:
@@ -399,9 +415,8 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
   }
   DECLARE_INSTRUCTION(VecMul);
  private:
@@ -419,9 +434,8 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
   }
   DECLARE_INSTRUCTION(VecDiv);
  private:
@@ -439,9 +453,8 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
   }
   DECLARE_INSTRUCTION(VecMin);
  private:
@@ -459,9 +472,8 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
   }
   DECLARE_INSTRUCTION(VecMax);
  private:
@@ -551,8 +563,7 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
   }
   DECLARE_INSTRUCTION(VecShl);
  private:
@@ -570,8 +581,7 @@
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
   }
   DECLARE_INSTRUCTION(VecShr);
  private:
@@ -589,8 +599,7 @@
            size_t vector_length,
            uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
-    DCHECK(left->IsVecOperation());
-    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
   }
   DECLARE_INSTRUCTION(VecUShr);
  private:
@@ -646,12 +655,9 @@
                       dex_pc),
         op_kind_(op) {
     DCHECK(op == InstructionKind::kAdd || op == InstructionKind::kSub);
-    DCHECK(accumulator->IsVecOperation());
-    DCHECK(mul_left->IsVecOperation() && mul_right->IsVecOperation());
-    DCHECK_EQ(accumulator->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(mul_left->AsVecOperation()->GetPackedType(), packed_type);
-    DCHECK_EQ(mul_right->AsVecOperation()->GetPackedType(), packed_type);
-
+    DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
+    DCHECK(HasConsistentPackedTypes(mul_left, packed_type));
+    DCHECK(HasConsistentPackedTypes(mul_right, packed_type));
     SetRawInputAt(kInputAccumulatorIndex, accumulator);
     SetRawInputAt(kInputMulLeftIndex, mul_left);
     SetRawInputAt(kInputMulRightIndex, mul_right);
@@ -687,6 +693,7 @@
            HInstruction* index,
            Primitive::Type packed_type,
            size_t vector_length,
+           bool is_string_char_at,
            uint32_t dex_pc = kNoDexPc)
       : HVecMemoryOperation(arena,
                             packed_type,
@@ -696,9 +703,18 @@
                             dex_pc) {
     SetRawInputAt(0, base);
     SetRawInputAt(1, index);
+    SetPackedFlag<kFieldIsStringCharAt>(is_string_char_at);
   }
   DECLARE_INSTRUCTION(VecLoad);
+
+  bool IsStringCharAt() const { return GetPackedFlag<kFieldIsStringCharAt>(); }
+
  private:
+  // Additional packed bits.
+  static constexpr size_t kFieldIsStringCharAt = HVecOperation::kNumberOfVectorOpPackedBits;
+  static constexpr size_t kNumberOfVecLoadPackedBits = kFieldIsStringCharAt + 1;
+  static_assert(kNumberOfVecLoadPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HVecLoad);
 };
 
@@ -719,8 +735,7 @@
                             /* number_of_inputs */ 3,
                             vector_length,
                             dex_pc) {
-    DCHECK(value->IsVecOperation());
-    DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK(HasConsistentPackedTypes(value, packed_type));
     SetRawInputAt(0, base);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 66bfea9..c3c141b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -167,6 +167,13 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  // Delete all the inputs to the constructor fence;
+  // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a
+  // LocationSummary in the LocationsBuilder.
+  constructor_fence->RemoveAllInputs();
+}
+
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
     HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 7ffbe44..395d4ba 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -43,6 +43,7 @@
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
+  void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 0ed8a35..0f24e81 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -652,6 +652,9 @@
   virtual void blx(Register rm, Condition cond = AL) = 0;
   virtual void bx(Register rm, Condition cond = AL) = 0;
 
+  // ADR instruction loading register for branching to the label.
+  virtual void AdrCode(Register rt, Label* label) = 0;
+
   // Memory barriers.
   virtual void dmb(DmbOptions flavor) = 0;
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 1e71d06..d7096b3 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -214,14 +214,14 @@
   DCHECK_GE(dest_end, src_end);
   for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) {
     Fixup* fixup = &*i;
+    size_t old_fixup_location = fixup->GetLocation();
     if (fixup->GetOriginalSize() == fixup->GetSize()) {
       // The size of this Fixup didn't change. To avoid moving the data
       // in small chunks, emit the code to its original position.
-      fixup->Emit(&buffer_, adjusted_code_size);
       fixup->Finalize(dest_end - src_end);
+      fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size);
     } else {
       // Move the data between the end of the fixup and src_end to its final location.
-      size_t old_fixup_location = fixup->GetLocation();
       size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes();
       size_t data_size = src_end - src_begin;
       size_t dest_begin  = dest_end - data_size;
@@ -230,7 +230,7 @@
       dest_end = dest_begin - fixup->GetSizeInBytes();
       // Finalize the Fixup and emit the data to the new location.
       fixup->Finalize(dest_end - src_end);
-      fixup->Emit(&buffer_, adjusted_code_size);
+      fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size);
     }
   }
   CHECK_EQ(src_end, dest_end);
@@ -1895,6 +1895,9 @@
     case kCbxz48Bit:
       return 6u;
 
+    case kCodeAddr4KiB:
+      return 4u;
+
     case kLiteral1KiB:
       return 2u;
     case kLiteral4KiB:
@@ -1973,6 +1976,15 @@
       diff -= 2;        // Extra CMP Rn, #0, 16-bit.
       break;
 
+    case kCodeAddr4KiB:
+      // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC
+      // isn't a multiple of 2, we need to adjust.
+      DCHECK_ALIGNED(diff, 2);
+      diff += location_ & 2;
+      // Add the Thumb mode bit.
+      diff += 1;
+      break;
+
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLongOrFPLiteral1KiB:
@@ -1987,8 +1999,8 @@
       diff = diff + (diff & 2);
       DCHECK_GE(diff, 0);
       break;
-    case kLiteral1MiB:
     case kLiteral64KiB:
+    case kLiteral1MiB:
     case kLongOrFPLiteral64KiB:
     case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
@@ -2041,6 +2053,10 @@
       // We don't support conditional branches beyond +-1MiB.
       return true;
 
+    case kCodeAddr4KiB:
+      // ADR uses the aligned PC and as such the offset cannot be calculated early.
+      return false;
+
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLiteral64KiB:
@@ -2087,6 +2103,10 @@
       // We don't support conditional branches beyond +-1MiB.
       break;
 
+    case kCodeAddr4KiB:
+      // We don't support Code address ADR beyond +4KiB.
+      break;
+
     case kLiteral1KiB:
       DCHECK(!IsHighRegister(rn_));
       if (IsUint<10>(GetOffset(current_code_size))) {
@@ -2159,13 +2179,15 @@
   return current_code_size - old_code_size;
 }
 
-void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const {
+void Thumb2Assembler::Fixup::Emit(uint32_t emit_location,
+                                  AssemblerBuffer* buffer,
+                                  uint32_t code_size) const {
   switch (GetSize()) {
     case kBranch16Bit: {
       DCHECK(type_ == kUnconditional || type_ == kConditional);
       DCHECK_EQ(type_ == kConditional, cond_ != AL);
       int16_t encoding = BEncoding16(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kBranch32Bit: {
@@ -2180,15 +2202,15 @@
         DCHECK_NE(encoding & B12, 0);
         encoding ^= B14 | B12;
       }
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
 
     case kCbxz16Bit: {
       DCHECK(type_ == kCompareAndBranchXZero);
       int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kCbxz32Bit: {
@@ -2196,8 +2218,8 @@
       DCHECK(cond_ == EQ || cond_ == NE);
       int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
       int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, cmp_encoding);
-      buffer->Store<int16_t>(location_ + 2, b_encoding);
+      buffer->Store<int16_t>(emit_location, cmp_encoding);
+      buffer->Store<int16_t>(emit_location + 2, b_encoding);
       break;
     }
     case kCbxz48Bit: {
@@ -2205,24 +2227,32 @@
       DCHECK(cond_ == EQ || cond_ == NE);
       int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
       int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, cmp_encoding);
-      buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, cmp_encoding);
+      buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff));
+      break;
+    }
+
+    case kCodeAddr4KiB: {
+      DCHECK(type_ == kLoadCodeAddr);
+      int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
 
     case kLiteral1KiB: {
       DCHECK(type_ == kLoadLiteralNarrow);
       int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kLiteral4KiB: {
       DCHECK(type_ == kLoadLiteralNarrow);
       // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly.
       int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
     case kLiteral64KiB: {
@@ -2242,11 +2272,11 @@
       int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
       int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff);
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
     case kLiteralFar: {
@@ -2256,36 +2286,36 @@
       int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
       int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0);
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
 
     case kLiteralAddr1KiB: {
       DCHECK(type_ == kLoadLiteralAddr);
       int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kLiteralAddr4KiB: {
       DCHECK(type_ == kLoadLiteralAddr);
       int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
     case kLiteralAddr64KiB: {
       DCHECK(type_ == kLoadLiteralAddr);
       int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
       break;
     }
     case kLiteralAddrFar: {
@@ -2294,29 +2324,29 @@
       int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
       int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
       break;
     }
 
     case kLongOrFPLiteral1KiB: {
       int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size));  // DCHECKs type_.
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
     case kLongOrFPLiteral64KiB: {
       int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
       int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u);    // DCHECKs type_.
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
     case kLongOrFPLiteralFar: {
@@ -2325,13 +2355,13 @@
       int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
       int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0);                 // DCHECKs type_.
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
   }
@@ -3331,6 +3361,19 @@
 }
 
 
+void Thumb2Assembler::AdrCode(Register rt, Label* label) {
+  uint32_t pc = buffer_.Size();
+  FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt));
+  CHECK(!label->IsBound());
+  // ADR target must be an unbound label. Add it to a singly-linked list maintained within
+  // the code with the label serving as the head.
+  Emit16(static_cast<uint16_t>(label->position_));
+  label->LinkTo(branch_id);
+  Emit16(0);
+  DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes());
+}
+
+
 void Thumb2Assembler::Push(Register rd, Condition cond) {
   str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
 }
@@ -3405,7 +3448,7 @@
         break;
       }
     }
-    last_fixup.Emit(&buffer_, buffer_.Size());
+    last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size());
     fixups_.pop_back();
   }
 }
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 1c495aa..5c36110 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -268,6 +268,9 @@
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
 
+  // ADR instruction loading register for branching to the label, including the Thumb mode bit.
+  void AdrCode(Register rt, Label* label) OVERRIDE;
+
   virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
   virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
@@ -377,6 +380,10 @@
     force_32bit_ = true;
   }
 
+  void Allow16Bit() {
+    force_32bit_ = false;
+  }
+
   // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
   // will generate a fixup.
   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
@@ -422,6 +429,7 @@
       kUnconditionalLink,         // BL.
       kUnconditionalLinkX,        // BLX.
       kCompareAndBranchXZero,     // cbz/cbnz.
+      kLoadCodeAddr,              // Get address of a code label, used for Baker read barriers.
       kLoadLiteralNarrow,         // Load narrrow integer literal.
       kLoadLiteralWide,           // Load wide integer literal.
       kLoadLiteralAddr,           // Load address of literal (used for jump table).
@@ -442,6 +450,10 @@
       kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
       kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
 
+      // ADR variants.
+      kCodeAddr4KiB,  // ADR rX, <label>; label must be after the ADR but within 4KiB range.
+                      // Multi-instruction expansion is not supported.
+
       // Load integer literal variants.
       // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
       kLiteral1KiB,
@@ -492,6 +504,12 @@
                    cond, kCompareAndBranchXZero, kCbxz16Bit, location);
     }
 
+    // Code address.
+    static Fixup LoadCodeAddress(uint32_t location, Register rt) {
+      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+                   AL, kLoadCodeAddr, kCodeAddr4KiB, location);
+    }
+
     // Load narrow literal.
     static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
@@ -550,6 +568,7 @@
       switch (GetOriginalSize()) {
         case kBranch32Bit:
         case kCbxz48Bit:
+        case kCodeAddr4KiB:
         case kLiteralFar:
         case kLiteralAddrFar:
         case kLongOrFPLiteralFar:
@@ -623,7 +642,7 @@
 
     // Emit the branch instruction into the assembler buffer.  This does the
     // encoding into the thumb instruction.
-    void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
+    void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const;
 
    private:
     Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
@@ -903,6 +922,24 @@
   FixupId last_fixup_id_;
 };
 
+class ScopedForce32Bit {
+ public:
+  explicit ScopedForce32Bit(Thumb2Assembler* assembler)
+      : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) {
+    assembler->Force32Bit();
+  }
+
+  ~ScopedForce32Bit() {
+    if (!old_force_32bit_) {
+      assembler_->Allow16Bit();
+    }
+  }
+
+ private:
+  Thumb2Assembler* const assembler_;
+  const bool old_force_32bit_;
+};
+
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 57223b5..f4afb33 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1356,6 +1356,106 @@
   EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12);
 }
 
+void Mips64Assembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10);
+}
+
 void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b);
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 666c693..6ac3361 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -682,6 +682,26 @@
   void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index f2e3b16..084ce6f 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -2668,6 +2668,106 @@
             "mod_u.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Add_aB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.d");
+}
+
 TEST_F(AssemblerMIPS64Test, FaddW) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"),
             "fadd.w");
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index c286b82..0ff9fc6 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -78,7 +78,7 @@
     mutable FreeByStartSet::const_iterator free_by_start_entry;
   };
   struct FreeBySizeComparator {
-    bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) {
+    bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const {
       if (lhs.size != rhs.size) {
         return lhs.size < rhs.size;
       } else {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 5090c11..58f3948 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -268,20 +268,17 @@
   UsageError("      Default: Optimizing");
   UsageError("");
   UsageError("  --compiler-filter="
-                "(verify-none"
-                "|verify-at-runtime"
-                "|verify-profile"
-                "|interpret-only"
-                "|time"
+                "(assume-verified"
+                "|extract"
+                "|verify"
+                "|quicken"
                 "|space-profile"
                 "|space"
-                "|balanced"
                 "|speed-profile"
                 "|speed"
                 "|everything-profile"
                 "|everything):");
   UsageError("      select compiler filter.");
-  UsageError("      verify-profile requires a --profile(-fd) to also be passed in.");
   UsageError("      Example: --compiler-filter=everything");
   UsageError("      Default: speed");
   UsageError("");
@@ -352,23 +349,23 @@
   UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
   UsageError("      Cannot be used together with --profile-file.");
   UsageError("");
-  UsageError("  --swap-file=<file-name>:  specifies a file to use for swap.");
+  UsageError("  --swap-file=<file-name>: specifies a file to use for swap.");
   UsageError("      Example: --swap-file=/data/tmp/swap.001");
   UsageError("");
-  UsageError("  --swap-fd=<file-descriptor>:  specifies a file to use for swap (by descriptor).");
+  UsageError("  --swap-fd=<file-descriptor>: specifies a file to use for swap (by descriptor).");
   UsageError("      Example: --swap-fd=10");
   UsageError("");
-  UsageError("  --swap-dex-size-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("  --swap-dex-size-threshold=<size>: specifies the minimum total dex file size in");
   UsageError("      bytes to allow the use of swap.");
   UsageError("      Example: --swap-dex-size-threshold=1000000");
   UsageError("      Default: %zu", kDefaultMinDexFileCumulativeSizeForSwap);
   UsageError("");
-  UsageError("  --swap-dex-count-threshold=<count>:  specifies the minimum number of dex files to");
+  UsageError("  --swap-dex-count-threshold=<count>: specifies the minimum number of dex files to");
   UsageError("      allow the use of swap.");
   UsageError("      Example: --swap-dex-count-threshold=10");
   UsageError("      Default: %zu", kDefaultMinDexFilesForSwap);
   UsageError("");
-  UsageError("  --very-large-app-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("  --very-large-app-threshold=<size>: specifies the minimum total dex file size in");
   UsageError("      bytes to consider the input \"very large\" and punt on the compilation.");
   UsageError("      Example: --very-large-app-threshold=100000000");
   UsageError("");
@@ -383,6 +380,14 @@
   UsageError("");
   UsageError("  --force-determinism: force the compiler to emit a deterministic output.");
   UsageError("");
+  UsageError("  --dump-cfg=<cfg-file>: dump control-flow graphs (CFGs) to specified file.");
+  UsageError("      Example: --dump-cfg=output.cfg");
+  UsageError("");
+  UsageError("  --dump-cfg-append: when dumping CFGs to an existing file, append new CFG data to");
+  UsageError("      existing data (instead of overwriting existing data with new data, which is");
+  UsageError("      the default behavior). This option is only meaningful when used with");
+  UsageError("      --dump-cfg.");
+  UsageError("");
   UsageError("  --classpath-dir=<directory-path>: directory used to resolve relative class paths.");
   UsageError("");
   std::cerr << "See log for usage error information\n";
@@ -721,6 +726,10 @@
       Usage("Can't have both --input-vdex-fd and --input-vdex");
     }
 
+    if (output_vdex_fd_ != -1 && !output_vdex_.empty()) {
+      Usage("Can't have both --output-vdex-fd and --output-vdex");
+    }
+
     if (!oat_filenames_.empty() && oat_fd_ != -1) {
       Usage("--oat-file should not be used with --oat-fd");
     }
@@ -1125,6 +1134,8 @@
         ParseInputVdexFd(option);
       } else if (option.starts_with("--input-vdex=")) {
         input_vdex_ = option.substr(strlen("--input-vdex=")).data();
+      } else if (option.starts_with("--output-vdex=")) {
+        output_vdex_ = option.substr(strlen("--output-vdex=")).data();
       } else if (option.starts_with("--output-vdex-fd=")) {
         ParseOutputVdexFd(option);
       } else if (option.starts_with("--oat-file=")) {
@@ -1260,6 +1271,7 @@
     }
 
     // OAT and VDEX file handling
+    bool eagerly_unquicken_vdex = DoDexLayoutOptimizations();
 
     if (oat_fd_ == -1) {
       DCHECK(!oat_filenames_.empty());
@@ -1281,12 +1293,15 @@
           input_vdex_file_ = VdexFile::Open(input_vdex_,
                                             /* writable */ false,
                                             /* low_4gb */ false,
+                                            eagerly_unquicken_vdex,
                                             &error_msg);
         }
 
         DCHECK_EQ(output_vdex_fd_, -1);
-        std::string vdex_filename = ReplaceFileExtension(oat_filename, "vdex");
-        if (vdex_filename == input_vdex_) {
+        std::string vdex_filename = output_vdex_.empty()
+            ? ReplaceFileExtension(oat_filename, "vdex")
+            : output_vdex_;
+        if (vdex_filename == input_vdex_ && output_vdex_.empty()) {
           update_input_vdex_ = true;
           std::unique_ptr<File> vdex_file(OS::OpenFileReadWrite(vdex_filename.c_str()));
           vdex_files_.push_back(std::move(vdex_file));
@@ -1328,6 +1343,7 @@
                                             "vdex",
                                             /* writable */ false,
                                             /* low_4gb */ false,
+                                            eagerly_unquicken_vdex,
                                             &error_msg);
           // If there's any problem with the passed vdex, just warn and proceed
           // without it.
@@ -1570,14 +1586,14 @@
 
     // If we need to downgrade the compiler-filter for size reasons, do that check now.
     if (!IsBootImage() && IsVeryLarge(dex_files_)) {
-      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime,
+      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kExtract,
                                       compiler_options_->GetCompilerFilter())) {
-        LOG(INFO) << "Very large app, downgrading to verify-at-runtime.";
+        LOG(INFO) << "Very large app, downgrading to extract.";
         // Note: this change won't be reflected in the key-value store, as that had to be
         //       finalized before loading the dex files. This setup is currently required
         //       to get the size from the DexFile objects.
         // TODO: refactor. b/29790079
-        compiler_options_->SetCompilerFilter(CompilerFilter::kVerifyAtRuntime);
+        compiler_options_->SetCompilerFilter(CompilerFilter::kExtract);
       }
     }
 
@@ -2085,17 +2101,13 @@
   }
 
   bool DoProfileGuidedOptimizations() const {
-    return UseProfile() && compiler_options_->GetCompilerFilter() != CompilerFilter::kVerifyProfile;
+    return UseProfile();
   }
 
   bool DoDexLayoutOptimizations() const {
     return DoProfileGuidedOptimizations();
   }
 
-  bool HasInputVdexFile() const {
-    return input_vdex_file_ != nullptr || input_vdex_fd_ != -1 || !input_vdex_.empty();
-  }
-
   bool LoadProfile() {
     DCHECK(UseProfile());
 
@@ -2151,16 +2163,6 @@
     return dex_files_size >= very_large_threshold_;
   }
 
-  template <typename T>
-  static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
-    std::vector<T*> result;
-    result.reserve(src.size());
-    for (const std::unique_ptr<T>& t : src) {
-      result.push_back(t.get());
-    }
-    return result;
-  }
-
   std::vector<std::string> GetClassPathLocations(const std::string& class_path) {
     // This function is used only for apps and for an app we have exactly one oat file.
     DCHECK(!IsBootImage());
@@ -2412,6 +2414,8 @@
     if (!IsBootImage()) {
       raw_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
     }
+    // Never allow implicit image compilation.
+    raw_options.push_back(std::make_pair("-Xnoimage-dex2oat", nullptr));
     // Disable libsigchain. We don't don't need it during compilation and it prevents us
     // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
     raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
@@ -2429,8 +2433,8 @@
       // which uses an unstarted runtime.
       raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr));
 
-      // Also force the free-list implementation for large objects.
-      raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr));
+      // The default LOS implementation (map) is not deterministic. So disable it.
+      raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=disabled", nullptr));
 
       // We also need to turn off the nonmoving space. For that, we need to disable HSpace
       // compaction (done above) and ensure that neither foreground nor background collectors
@@ -2693,6 +2697,7 @@
   int input_vdex_fd_;
   int output_vdex_fd_;
   std::string input_vdex_;
+  std::string output_vdex_;
   std::unique_ptr<VdexFile> input_vdex_file_;
   std::vector<const char*> dex_filenames_;
   std::vector<const char*> dex_locations_;
@@ -2899,13 +2904,6 @@
     }
   }
 
-  if (dex2oat->DoDexLayoutOptimizations()) {
-    if (dex2oat->HasInputVdexFile()) {
-      LOG(ERROR) << "Dexlayout is incompatible with an input VDEX";
-      return dex2oat::ReturnCode::kOther;
-    }
-  }
-
   art::MemMap::Init();  // For ZipEntry::ExtractToMemMap, and vdex.
 
   // Check early that the result of compilation can be written
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 8c14b50..6420aa8 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -161,7 +161,7 @@
     runtime->AddCurrentRuntimeFeaturesAsDex2OatArguments(&argv);
 
     if (!runtime->IsVerificationEnabled()) {
-      argv.push_back("--compiler-filter=verify-none");
+      argv.push_back("--compiler-filter=assume-verified");
     }
 
     if (runtime->MustRelocateIfPossible()) {
@@ -430,6 +430,9 @@
 };
 
 TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  // Native memory usage isn't correctly tracked under sanitization.
+  TEST_DISABLED_FOR_MEMORY_TOOL_ASAN();
+
   // The `native_alloc_2_ >= native_alloc_1_` assertion below may not
   // hold true on some x86 systems; disable this test while we
   // investigate (b/29259363).
@@ -514,7 +517,7 @@
       }
 
       // If the input filter was "below," it should have been used.
-      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime, filter)) {
+      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kExtract, filter)) {
         EXPECT_EQ(odex_file->GetCompilerFilter(), filter);
       }
     } else {
@@ -536,11 +539,11 @@
   void CheckHostResult(bool expect_large) {
     if (!kIsTargetBuild) {
       if (expect_large) {
-        EXPECT_NE(output_.find("Very large app, downgrading to verify-at-runtime."),
+        EXPECT_NE(output_.find("Very large app, downgrading to extract."),
                   std::string::npos)
             << output_;
       } else {
-        EXPECT_EQ(output_.find("Very large app, downgrading to verify-at-runtime."),
+        EXPECT_EQ(output_.find("Very large app, downgrading to extract."),
                   std::string::npos)
             << output_;
       }
@@ -567,21 +570,21 @@
 };
 
 TEST_F(Dex2oatVeryLargeTest, DontUseVeryLarge) {
-  RunTest(CompilerFilter::kVerifyNone, false);
-  RunTest(CompilerFilter::kVerifyAtRuntime, false);
-  RunTest(CompilerFilter::kInterpretOnly, false);
+  RunTest(CompilerFilter::kAssumeVerified, false);
+  RunTest(CompilerFilter::kExtract, false);
+  RunTest(CompilerFilter::kQuicken, false);
   RunTest(CompilerFilter::kSpeed, false);
 
-  RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=1000000" });
-  RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=1000000" });
-  RunTest(CompilerFilter::kInterpretOnly, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kAssumeVerified, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kExtract, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kQuicken, false, { "--very-large-app-threshold=1000000" });
   RunTest(CompilerFilter::kSpeed, false, { "--very-large-app-threshold=1000000" });
 }
 
 TEST_F(Dex2oatVeryLargeTest, UseVeryLarge) {
-  RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=100" });
-  RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=100" });
-  RunTest(CompilerFilter::kInterpretOnly, true, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kAssumeVerified, false, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kExtract, false, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kQuicken, true, { "--very-large-app-threshold=100" });
   RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
 }
 
@@ -736,12 +739,12 @@
                          /* use_fd */ true,
                          /* num_profile_classes */ 1,
                          { input_vdex, output_vdex },
-                         /* expect_success */ false);
-      EXPECT_EQ(vdex_file2.GetFile()->GetLength(), 0u);
+                         /* expect_success */ true);
+      EXPECT_GT(vdex_file2.GetFile()->GetLength(), 0u);
     }
     ASSERT_EQ(vdex_file1->FlushCloseOrErase(), 0) << "Could not flush and close vdex file";
     CheckValidity();
-    ASSERT_FALSE(success_);
+    ASSERT_TRUE(success_);
   }
 
   void CheckResult(const std::string& dex_location,
@@ -884,6 +887,7 @@
 };
 
 TEST_F(Dex2oatReturnCodeTest, TestCreateRuntime) {
+  TEST_DISABLED_FOR_MEMORY_TOOL();  // b/19100793
   int status = RunTest({ "--boot-image=/this/does/not/exist/yolo.oat" });
   EXPECT_EQ(static_cast<int>(dex2oat::ReturnCode::kCreateRuntime), WEXITSTATUS(status)) << output_;
 }
diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp
index 4b65c52..588a3ae 100644
--- a/dexlayout/Android.bp
+++ b/dexlayout/Android.bp
@@ -20,7 +20,7 @@
         "dexlayout.cc",
         "dex_ir.cc",
         "dex_ir_builder.cc",
-	"dex_verify.cc",
+        "dex_verify.cc",
         "dex_visualize.cc",
         "dex_writer.cc",
     ],
@@ -43,12 +43,14 @@
 
 art_cc_binary {
     name: "dexlayout",
+    defaults: ["art_defaults"],
     host_supported: true,
     srcs: ["dexlayout_main.cc"],
     cflags: ["-Wall"],
     shared_libs: [
         "libart",
         "libart-dexlayout",
+        "libbase",
     ],
 }
 
@@ -60,13 +62,28 @@
 
 art_cc_binary {
     name: "dexdiag",
-    host_supported: false,
+    defaults: ["art_defaults"],
+    host_supported: true,
     srcs: ["dexdiag.cc"],
     cflags: ["-Wall"],
     shared_libs: [
         "libart",
         "libart-dexlayout",
-        "libpagemap",
     ],
+    target: {
+        android: {
+            shared_libs: [
+                "libpagemap",
+            ]
+        },
+    }
 }
 
+art_cc_test {
+    name: "art_dexdiag_tests",
+    host_supported: true,
+    defaults: [
+        "art_gtest_defaults",
+    ],
+    srcs: ["dexdiag_test.cc"],
+}
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 6bd9da8..cf453b9 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -281,6 +281,16 @@
       item->SetDouble(conv.d);
       break;
     }
+    case DexFile::kDexAnnotationMethodType: {
+      const uint32_t proto_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item->SetProtoId(GetProtoId(proto_index));
+      break;
+    }
+    case DexFile::kDexAnnotationMethodHandle: {
+      const uint32_t method_handle_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item->SetMethodHandle(GetMethodHandle(method_handle_index));
+      break;
+    }
     case DexFile::kDexAnnotationString: {
       const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
       item->SetStringId(GetStringId(string_index));
@@ -451,8 +461,8 @@
   }
   uint8_t visibility = annotation->visibility_;
   const uint8_t* annotation_data = annotation->annotation_;
-  EncodedValue* encoded_value =
-      ReadEncodedValue(&annotation_data, DexFile::kDexAnnotationAnnotation, 0);
+  std::unique_ptr<EncodedValue> encoded_value(
+      ReadEncodedValue(&annotation_data, DexFile::kDexAnnotationAnnotation, 0));
   // TODO: Calculate the size of the annotation.
   AnnotationItem* annotation_item =
       new AnnotationItem(visibility, encoded_value->ReleaseEncodedAnnotation());
@@ -708,10 +718,12 @@
   MethodId* method_item = GetMethodId(cdii.GetMemberIndex());
   uint32_t access_flags = cdii.GetRawMemberAccessFlags();
   const DexFile::CodeItem* disk_code_item = cdii.GetMethodCodeItem();
-  CodeItem* code_item = nullptr;
+  CodeItem* code_item = code_items_.GetExistingObject(cdii.GetMethodCodeItemOffset());;
   DebugInfoItem* debug_info = nullptr;
   if (disk_code_item != nullptr) {
-    code_item = CreateCodeItem(dex_file, *disk_code_item, cdii.GetMethodCodeItemOffset());
+    if (code_item == nullptr) {
+      code_item = CreateCodeItem(dex_file, *disk_code_item, cdii.GetMethodCodeItemOffset());
+    }
     debug_info = code_item->DebugInfo();
   }
   if (debug_info != nullptr) {
@@ -764,6 +776,64 @@
   return class_data;
 }
 
+void Collections::CreateCallSitesAndMethodHandles(const DexFile& dex_file) {
+  // Iterate through the map list and set the offset of the CallSiteIds and MethodHandleItems.
+  const DexFile::MapList* map =
+      reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + MapListOffset());
+  for (uint32_t i = 0; i < map->size_; ++i) {
+    const DexFile::MapItem* item = map->list_ + i;
+    switch (item->type_) {
+      case DexFile::kDexTypeCallSiteIdItem:
+        SetCallSiteIdsOffset(item->offset_);
+        break;
+      case DexFile::kDexTypeMethodHandleItem:
+        SetMethodHandleItemsOffset(item->offset_);
+        break;
+      default:
+        break;
+    }
+  }
+  // Populate MethodHandleItems first (CallSiteIds may depend on them).
+  for (uint32_t i = 0; i < dex_file.NumMethodHandles(); i++) {
+    CreateMethodHandleItem(dex_file, i);
+  }
+  // Populate CallSiteIds.
+  for (uint32_t i = 0; i < dex_file.NumCallSiteIds(); i++) {
+    CreateCallSiteId(dex_file, i);
+  }
+}
+
+void Collections::CreateCallSiteId(const DexFile& dex_file, uint32_t i) {
+  const DexFile::CallSiteIdItem& disk_call_site_id = dex_file.GetCallSiteId(i);
+  const uint8_t* disk_call_item_ptr = dex_file.Begin() + disk_call_site_id.data_off_;
+  EncodedArrayItem* call_site_item =
+      CreateEncodedArrayItem(disk_call_item_ptr, disk_call_site_id.data_off_);
+
+  CallSiteId* call_site_id = new CallSiteId(call_site_item);
+  call_site_ids_.AddIndexedItem(call_site_id, CallSiteIdsOffset() + i * CallSiteId::ItemSize(), i);
+}
+
+void Collections::CreateMethodHandleItem(const DexFile& dex_file, uint32_t i) {
+  const DexFile::MethodHandleItem& disk_method_handle = dex_file.GetMethodHandle(i);
+  uint16_t index = disk_method_handle.field_or_method_idx_;
+  DexFile::MethodHandleType type =
+      static_cast<DexFile::MethodHandleType>(disk_method_handle.method_handle_type_);
+  bool is_invoke = type == DexFile::MethodHandleType::kInvokeStatic ||
+                   type == DexFile::MethodHandleType::kInvokeInstance ||
+                   type == DexFile::MethodHandleType::kInvokeConstructor;
+  static_assert(DexFile::MethodHandleType::kLast == DexFile::MethodHandleType::kInvokeConstructor,
+                "Unexpected method handle types.");
+  IndexedItem* field_or_method_id;
+  if (is_invoke) {
+    field_or_method_id = GetMethodId(index);
+  } else {
+    field_or_method_id = GetFieldId(index);
+  }
+  MethodHandleItem* method_handle = new MethodHandleItem(type, field_or_method_id);
+  method_handle_items_.AddIndexedItem(
+      method_handle, MethodHandleItemsOffset() + i * MethodHandleItem::ItemSize(), i);
+}
+
 static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
   return 0;
 }
@@ -821,6 +891,16 @@
     &dex_ir::Collections::ClassDefsSize,
     &dex_ir::Collections::ClassDefsOffset
   }, {
+    "CallSiteId",
+    DexFile::kDexTypeCallSiteIdItem,
+    &dex_ir::Collections::CallSiteIdsSize,
+    &dex_ir::Collections::CallSiteIdsOffset
+  }, {
+    "MethodHandle",
+    DexFile::kDexTypeMethodHandleItem,
+    &dex_ir::Collections::MethodHandleItemsSize,
+    &dex_ir::Collections::MethodHandleItemsOffset
+  }, {
     "StringData",
     DexFile::kDexTypeStringDataItem,
     &dex_ir::Collections::StringDatasSize,
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index cad0395..5692eb2 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -35,6 +35,7 @@
 class AnnotationsDirectoryItem;
 class AnnotationSetItem;
 class AnnotationSetRefList;
+class CallSiteId;
 class ClassData;
 class ClassDef;
 class CodeItem;
@@ -47,6 +48,7 @@
 class Header;
 class MapList;
 class MapItem;
+class MethodHandleItem;
 class MethodId;
 class MethodItem;
 class ParameterAnnotation;
@@ -65,6 +67,8 @@
 static constexpr size_t kFieldIdItemSize = 8;
 static constexpr size_t kMethodIdItemSize = 8;
 static constexpr size_t kClassDefItemSize = 32;
+static constexpr size_t kCallSiteIdItemSize = 4;
+static constexpr size_t kMethodHandleItemSize = 8;
 
 // Visitor support
 class AbstractDispatcher {
@@ -79,6 +83,8 @@
   virtual void Dispatch(const ProtoId* proto_id) = 0;
   virtual void Dispatch(const FieldId* field_id) = 0;
   virtual void Dispatch(const MethodId* method_id) = 0;
+  virtual void Dispatch(const CallSiteId* call_site_id) = 0;
+  virtual void Dispatch(const MethodHandleItem* method_handle_item) = 0;
   virtual void Dispatch(ClassData* class_data) = 0;
   virtual void Dispatch(ClassDef* class_def) = 0;
   virtual void Dispatch(FieldItem* field_item) = 0;
@@ -165,6 +171,9 @@
   std::vector<std::unique_ptr<FieldId>>& FieldIds() { return field_ids_.Collection(); }
   std::vector<std::unique_ptr<MethodId>>& MethodIds() { return method_ids_.Collection(); }
   std::vector<std::unique_ptr<ClassDef>>& ClassDefs() { return class_defs_.Collection(); }
+  std::vector<std::unique_ptr<CallSiteId>>& CallSiteIds() { return call_site_ids_.Collection(); }
+  std::vector<std::unique_ptr<MethodHandleItem>>& MethodHandleItems()
+      { return method_handle_items_.Collection(); }
   std::map<uint32_t, std::unique_ptr<StringData>>& StringDatas()
       { return string_datas_.Collection(); }
   std::map<uint32_t, std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); }
@@ -189,6 +198,10 @@
   void CreateFieldId(const DexFile& dex_file, uint32_t i);
   void CreateMethodId(const DexFile& dex_file, uint32_t i);
   void CreateClassDef(const DexFile& dex_file, uint32_t i);
+  void CreateCallSiteId(const DexFile& dex_file, uint32_t i);
+  void CreateMethodHandleItem(const DexFile& dex_file, uint32_t i);
+
+  void CreateCallSitesAndMethodHandles(const DexFile& dex_file);
 
   TypeList* CreateTypeList(const DexFile::TypeList* type_list, uint32_t offset);
   EncodedArrayItem* CreateEncodedArrayItem(const uint8_t* static_data, uint32_t offset);
@@ -207,6 +220,8 @@
   FieldId* GetFieldId(uint32_t index) { return FieldIds()[index].get(); }
   MethodId* GetMethodId(uint32_t index) { return MethodIds()[index].get(); }
   ClassDef* GetClassDef(uint32_t index) { return ClassDefs()[index].get(); }
+  CallSiteId* GetCallSiteId(uint32_t index) { return CallSiteIds()[index].get(); }
+  MethodHandleItem* GetMethodHandle(uint32_t index) { return MethodHandleItems()[index].get(); }
 
   StringId* GetStringIdOrNullPtr(uint32_t index) {
     return index == DexFile::kDexNoIndex ? nullptr : GetStringId(index);
@@ -221,6 +236,8 @@
   uint32_t FieldIdsOffset() const { return field_ids_.GetOffset(); }
   uint32_t MethodIdsOffset() const { return method_ids_.GetOffset(); }
   uint32_t ClassDefsOffset() const { return class_defs_.GetOffset(); }
+  uint32_t CallSiteIdsOffset() const { return call_site_ids_.GetOffset(); }
+  uint32_t MethodHandleItemsOffset() const { return method_handle_items_.GetOffset(); }
   uint32_t StringDatasOffset() const { return string_datas_.GetOffset(); }
   uint32_t TypeListsOffset() const { return type_lists_.GetOffset(); }
   uint32_t EncodedArrayItemsOffset() const { return encoded_array_items_.GetOffset(); }
@@ -240,6 +257,9 @@
   void SetFieldIdsOffset(uint32_t new_offset) { field_ids_.SetOffset(new_offset); }
   void SetMethodIdsOffset(uint32_t new_offset) { method_ids_.SetOffset(new_offset); }
   void SetClassDefsOffset(uint32_t new_offset) { class_defs_.SetOffset(new_offset); }
+  void SetCallSiteIdsOffset(uint32_t new_offset) { call_site_ids_.SetOffset(new_offset); }
+  void SetMethodHandleItemsOffset(uint32_t new_offset)
+      { method_handle_items_.SetOffset(new_offset); }
   void SetStringDatasOffset(uint32_t new_offset) { string_datas_.SetOffset(new_offset); }
   void SetTypeListsOffset(uint32_t new_offset) { type_lists_.SetOffset(new_offset); }
   void SetEncodedArrayItemsOffset(uint32_t new_offset)
@@ -262,6 +282,8 @@
   uint32_t FieldIdsSize() const { return field_ids_.Size(); }
   uint32_t MethodIdsSize() const { return method_ids_.Size(); }
   uint32_t ClassDefsSize() const { return class_defs_.Size(); }
+  uint32_t CallSiteIdsSize() const { return call_site_ids_.Size(); }
+  uint32_t MethodHandleItemsSize() const { return method_handle_items_.Size(); }
   uint32_t StringDatasSize() const { return string_datas_.Size(); }
   uint32_t TypeListsSize() const { return type_lists_.Size(); }
   uint32_t EncodedArrayItemsSize() const { return encoded_array_items_.Size(); }
@@ -288,6 +310,8 @@
   CollectionVector<FieldId> field_ids_;
   CollectionVector<MethodId> method_ids_;
   CollectionVector<ClassDef> class_defs_;
+  CollectionVector<CallSiteId> call_site_ids_;
+  CollectionVector<MethodHandleItem> method_handle_items_;
 
   CollectionMap<StringData> string_datas_;
   CollectionMap<TypeList> type_lists_;
@@ -603,8 +627,10 @@
   void SetDouble(double d) { u_.double_val_ = d; }
   void SetStringId(StringId* string_id) { u_.string_val_ = string_id; }
   void SetTypeId(TypeId* type_id) { u_.type_val_ = type_id; }
+  void SetProtoId(ProtoId* proto_id) { u_.proto_val_ = proto_id; }
   void SetFieldId(FieldId* field_id) { u_.field_val_ = field_id; }
   void SetMethodId(MethodId* method_id) { u_.method_val_ = method_id; }
+  void SetMethodHandle(MethodHandleItem* method_handle) { u_.method_handle_val_ = method_handle; }
   void SetEncodedArray(EncodedArrayItem* encoded_array) { encoded_array_.reset(encoded_array); }
   void SetEncodedAnnotation(EncodedAnnotation* encoded_annotation)
       { encoded_annotation_.reset(encoded_annotation); }
@@ -619,8 +645,10 @@
   double GetDouble() const { return u_.double_val_; }
   StringId* GetStringId() const { return u_.string_val_; }
   TypeId* GetTypeId() const { return u_.type_val_; }
+  ProtoId* GetProtoId() const { return u_.proto_val_; }
   FieldId* GetFieldId() const { return u_.field_val_; }
   MethodId* GetMethodId() const { return u_.method_val_; }
+  MethodHandleItem* GetMethodHandle() const { return u_.method_handle_val_; }
   EncodedArrayItem* GetEncodedArray() const { return encoded_array_.get(); }
   EncodedAnnotation* GetEncodedAnnotation() const { return encoded_annotation_.get(); }
 
@@ -639,8 +667,10 @@
     double double_val_;
     StringId* string_val_;
     TypeId* type_val_;
+    ProtoId* proto_val_;
     FieldId* field_val_;
     MethodId* method_val_;
+    MethodHandleItem* method_handle_val_;
   } u_;
   std::unique_ptr<EncodedArrayItem> encoded_array_;
   std::unique_ptr<EncodedAnnotation> encoded_annotation_;
@@ -1087,6 +1117,48 @@
   DISALLOW_COPY_AND_ASSIGN(AnnotationsDirectoryItem);
 };
 
+class CallSiteId : public IndexedItem {
+ public:
+  explicit CallSiteId(EncodedArrayItem* call_site_item) : call_site_item_(call_site_item) {
+    size_ = kCallSiteIdItemSize;
+  }
+  ~CallSiteId() OVERRIDE { }
+
+  static size_t ItemSize() { return kCallSiteIdItemSize; }
+
+  EncodedArrayItem* CallSiteItem() const { return call_site_item_; }
+
+  void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); }
+
+ private:
+  EncodedArrayItem* call_site_item_;
+
+  DISALLOW_COPY_AND_ASSIGN(CallSiteId);
+};
+
+class MethodHandleItem : public IndexedItem {
+ public:
+  MethodHandleItem(DexFile::MethodHandleType method_handle_type, IndexedItem* field_or_method_id)
+      : method_handle_type_(method_handle_type),
+        field_or_method_id_(field_or_method_id) {
+    size_ = kMethodHandleItemSize;
+  }
+  ~MethodHandleItem() OVERRIDE { }
+
+  static size_t ItemSize() { return kMethodHandleItemSize; }
+
+  DexFile::MethodHandleType GetMethodHandleType() const { return method_handle_type_; }
+  IndexedItem* GetFieldOrMethodId() const { return field_or_method_id_; }
+
+  void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); }
+
+ private:
+  DexFile::MethodHandleType method_handle_type_;
+  IndexedItem* field_or_method_id_;
+
+  DISALLOW_COPY_AND_ASSIGN(MethodHandleItem);
+};
+
 // TODO(sehr): implement MapList.
 class MapList : public Item {
  public:
diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc
index d0c5bf9..8eb726a 100644
--- a/dexlayout/dex_ir_builder.cc
+++ b/dexlayout/dex_ir_builder.cc
@@ -72,6 +72,8 @@
   }
   // MapItem.
   collections.SetMapListOffset(disk_header.map_off_);
+  // CallSiteIds and MethodHandleItems.
+  collections.CreateCallSitesAndMethodHandles(dex_file);
 
   CheckAndSetRemainingOffsets(dex_file, &collections);
 
@@ -115,6 +117,14 @@
         CHECK_EQ(item->size_, collections->ClassDefsSize());
         CHECK_EQ(item->offset_, collections->ClassDefsOffset());
         break;
+      case DexFile::kDexTypeCallSiteIdItem:
+        CHECK_EQ(item->size_, collections->CallSiteIdsSize());
+        CHECK_EQ(item->offset_, collections->CallSiteIdsOffset());
+        break;
+      case DexFile::kDexTypeMethodHandleItem:
+        CHECK_EQ(item->size_, collections->MethodHandleItemsSize());
+        CHECK_EQ(item->offset_, collections->MethodHandleItemsOffset());
+        break;
       case DexFile::kDexTypeMapList:
         CHECK_EQ(item->size_, 1u);
         CHECK_EQ(item->offset_, disk_header.map_off_);
diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc
index 7ffa38b..e1b828c 100644
--- a/dexlayout/dex_writer.cc
+++ b/dexlayout/dex_writer.cc
@@ -151,6 +151,12 @@
       length = EncodeDoubleValue(encoded_value->GetDouble(), buffer);
       start = 8 - length;
       break;
+    case DexFile::kDexAnnotationMethodType:
+      length = EncodeUIntValue(encoded_value->GetProtoId()->GetIndex(), buffer);
+      break;
+    case DexFile::kDexAnnotationMethodHandle:
+      length = EncodeUIntValue(encoded_value->GetMethodHandle()->GetIndex(), buffer);
+      break;
     case DexFile::kDexAnnotationString:
       length = EncodeUIntValue(encoded_value->GetStringId()->GetIndex(), buffer);
       break;
@@ -485,6 +491,27 @@
   }
 }
 
+void DexWriter::WriteCallSites() {
+  uint32_t call_site_off[1];
+  for (std::unique_ptr<dex_ir::CallSiteId>& call_site_id :
+      header_->GetCollections().CallSiteIds()) {
+    call_site_off[0] = call_site_id->CallSiteItem()->GetOffset();
+    Write(call_site_off, call_site_id->GetSize(), call_site_id->GetOffset());
+  }
+}
+
+void DexWriter::WriteMethodHandles() {
+  uint16_t method_handle_buff[4];
+  for (std::unique_ptr<dex_ir::MethodHandleItem>& method_handle :
+      header_->GetCollections().MethodHandleItems()) {
+    method_handle_buff[0] = static_cast<uint16_t>(method_handle->GetMethodHandleType());
+    method_handle_buff[1] = 0;  // unused.
+    method_handle_buff[2] = method_handle->GetFieldOrMethodId()->GetIndex();
+    method_handle_buff[3] = 0;  // unused.
+    Write(method_handle_buff, method_handle->GetSize(), method_handle->GetOffset());
+  }
+}
+
 struct MapItemContainer {
   MapItemContainer(uint32_t type, uint32_t size, uint32_t offset)
       : type_(type), size_(size), offset_(offset) { }
@@ -528,6 +555,14 @@
     queue.push(MapItemContainer(DexFile::kDexTypeClassDefItem, collection.ClassDefsSize(),
         collection.ClassDefsOffset()));
   }
+  if (collection.CallSiteIdsSize() != 0) {
+    queue.push(MapItemContainer(DexFile::kDexTypeCallSiteIdItem, collection.CallSiteIdsSize(),
+        collection.CallSiteIdsOffset()));
+  }
+  if (collection.MethodHandleItemsSize() != 0) {
+    queue.push(MapItemContainer(DexFile::kDexTypeMethodHandleItem,
+        collection.MethodHandleItemsSize(), collection.MethodHandleItemsOffset()));
+  }
 
   // Data section.
   queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapListOffset()));
@@ -618,10 +653,8 @@
   uint32_t class_defs_off = collections.ClassDefsOffset();
   buffer[16] = class_defs_size;
   buffer[17] = class_defs_off;
-  uint32_t data_off = class_defs_off + class_defs_size * dex_ir::ClassDef::ItemSize();
-  uint32_t data_size = file_size - data_off;
-  buffer[18] = data_size;
-  buffer[19] = data_off;
+  buffer[18] = header_->DataSize();
+  buffer[19] = header_->DataOffset();
   Write(buffer, 20 * sizeof(uint32_t), offset);
 }
 
@@ -640,6 +673,8 @@
   WriteDebugInfoItems();
   WriteCodeItems();
   WriteClasses();
+  WriteCallSites();
+  WriteMethodHandles();
   WriteMapItem();
   WriteHeader();
 }
diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h
index fb76e5c..b396adf 100644
--- a/dexlayout/dex_writer.h
+++ b/dexlayout/dex_writer.h
@@ -59,6 +59,8 @@
   void WriteDebugInfoItems();
   void WriteCodeItems();
   void WriteClasses();
+  void WriteCallSites();
+  void WriteMethodHandles();
   void WriteMapItem();
   void WriteHeader();
 
diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc
index 688201b..c577b6e 100644
--- a/dexlayout/dexdiag.cc
+++ b/dexlayout/dexdiag.cc
@@ -15,6 +15,7 @@
  */
 
 #include <errno.h>
+#include <inttypes.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -30,7 +31,9 @@
 #include "dex_file.h"
 #include "dex_ir.h"
 #include "dex_ir_builder.h"
+#ifdef ART_TARGET_ANDROID
 #include "pagemap/pagemap.h"
+#endif
 #include "runtime.h"
 #include "vdex_file.h"
 
@@ -38,8 +41,6 @@
 
 using android::base::StringPrintf;
 
-static constexpr size_t kLineLength = 32;
-
 static bool g_verbose = false;
 
 // The width needed to print a file page offset (32-bit).
@@ -164,6 +165,7 @@
   std::cout << ". (Mapped page not resident)" << std::endl;
 }
 
+#ifdef ART_TARGET_ANDROID
 static char PageTypeChar(uint16_t type) {
   if (kDexSectionInfoMap.find(type) == kDexSectionInfoMap.end()) {
     return '-';
@@ -194,6 +196,7 @@
                            size_t end,
                            const std::vector<dex_ir::DexFileSection>& sections,
                            PageCount* page_counts) {
+  static constexpr size_t kLineLength = 32;
   for (size_t page = start; page < end; ++page) {
     char type_char = '.';
     if (PM_PAGEMAP_PRESENT(pagemap[page])) {
@@ -268,7 +271,7 @@
     std::cerr << "Dex file start offset for "
               << dex_file->GetLocation().c_str()
               << " is incorrect: map start "
-              << StringPrintf("%zx > dex start %zx\n", map_start, dex_file_start)
+              << StringPrintf("%" PRIx64 " > dex start %" PRIx64 "\n", map_start, dex_file_start)
               << std::endl;
     return;
   }
@@ -277,7 +280,7 @@
   uint64_t end_page = RoundUp(start_address + dex_file_size, kPageSize) / kPageSize;
   std::cout << "DEX "
             << dex_file->GetLocation().c_str()
-            << StringPrintf(": %zx-%zx",
+            << StringPrintf(": %" PRIx64 "-%" PRIx64,
                             map_start + start_page * kPageSize,
                             map_start + end_page * kPageSize)
             << std::endl;
@@ -293,26 +296,26 @@
   DisplayDexStatistics(start_page, end_page, section_resident_pages, sections, printer);
 }
 
-static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) {
+static bool IsVdexFileMapping(const std::string& mapped_name) {
   // Confirm that the map is from a vdex file.
   static const char* suffixes[] = { ".vdex" };
-  std::string vdex_name;
-  bool found = false;
-  for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) {
-    if (strstr(pm_map_name(map), suffixes[j]) != nullptr) {
-      vdex_name = pm_map_name(map);
-      found = true;
-      break;
+  for (const char* suffix : suffixes) {
+    size_t match_loc = mapped_name.find(suffix);
+    if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) {
+      return true;
     }
   }
-  if (!found) {
-    return true;
-  }
+  return false;
+}
+
+static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) {
+  std::string vdex_name = pm_map_name(map);
   // Extract all the dex files from the vdex file.
   std::string error_msg;
   std::unique_ptr<VdexFile> vdex(VdexFile::Open(vdex_name,
                                                 false /*writeable*/,
                                                 false /*low_4gb*/,
+                                                false /*unquicken */,
                                                 &error_msg /*out*/));
   if (vdex == nullptr) {
     std::cerr << "Could not open vdex file "
@@ -330,6 +333,7 @@
               << ": error "
               << error_msg
               << std::endl;
+    return false;
   }
   // Open the page mapping (one uint64_t per page) for the entire vdex mapping.
   uint64_t* pagemap;
@@ -341,7 +345,7 @@
   // Process the dex files.
   std::cout << "MAPPING "
             << pm_map_name(map)
-            << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map))
+            << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map))
             << std::endl;
   for (const auto& dex_file : dex_files) {
     ProcessOneDexMapping(pagemap,
@@ -355,6 +359,7 @@
 }
 
 static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printer) {
+  static constexpr size_t kLineLength = 32;
   size_t resident_page_count = 0;
   for (size_t page = 0; page < size; ++page) {
     char type_char = '.';
@@ -380,21 +385,19 @@
   printer->PrintSkipLine();
 }
 
-static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) {
-  // Confirm that the map is from a vdex file.
+static bool IsOatFileMapping(const std::string& mapped_name) {
+  // Confirm that the map is from an oat file.
   static const char* suffixes[] = { ".odex", ".oat" };
-  std::string vdex_name;
-  bool found = false;
-  for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) {
-    if (strstr(pm_map_name(map), suffixes[j]) != nullptr) {
-      vdex_name = pm_map_name(map);
-      found = true;
-      break;
+  for (const char* suffix : suffixes) {
+    size_t match_loc = mapped_name.find(suffix);
+    if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) {
+      return true;
     }
   }
-  if (!found) {
-    return true;
-  }
+  return false;
+}
+
+static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) {
   // Open the page mapping (one uint64_t per page) for the entire vdex mapping.
   uint64_t* pagemap;
   size_t len;
@@ -405,7 +408,7 @@
   // Process the dex files.
   std::cout << "MAPPING "
             << pm_map_name(map)
-            << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map))
+            << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map))
             << std::endl;
   ProcessOneOatMapping(pagemap, len, printer);
   free(pagemap);
@@ -425,9 +428,10 @@
   }
   return false;
 }
+#endif
 
 static void Usage(const char* cmd) {
-  std::cerr << "Usage: " << cmd << " [options] pid" << std::endl
+  std::cout << "Usage: " << cmd << " [options] pid" << std::endl
             << "    --contains=<string>:  Display sections containing string." << std::endl
             << "    --help:               Shows this message." << std::endl
             << "    --verbose:            Makes displays verbose." << std::endl;
@@ -462,6 +466,7 @@
   InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
 
+#ifdef ART_TARGET_ANDROID
   pid_t pid;
   char* endptr;
   pid = (pid_t)strtol(argv[argc - 1], &endptr, 10);
@@ -495,7 +500,8 @@
     return EXIT_FAILURE;
   }
 
-  // Process the mappings that are due to DEX files.
+  bool match_found = false;
+  // Process the mappings that are due to vdex or oat files.
   Printer printer;
   for (size_t i = 0; i < num_maps; ++i) {
     std::string mapped_file_name = pm_map_name(maps[i]);
@@ -503,12 +509,23 @@
     if (!FilterByNameContains(mapped_file_name, name_filters)) {
       continue;
     }
-    if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) {
-      return EXIT_FAILURE;
-    } else if (!DisplayMappingIfFromOatFile(maps[i], &printer)) {
-      return EXIT_FAILURE;
+    if (IsVdexFileMapping(mapped_file_name)) {
+      if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) {
+        return EXIT_FAILURE;
+      }
+      match_found = true;
+    } else if (IsOatFileMapping(mapped_file_name)) {
+      if (!DisplayMappingIfFromOatFile(maps[i], &printer)) {
+        return EXIT_FAILURE;
+      }
+      match_found = true;
     }
   }
+  if (!match_found) {
+    std::cerr << "No relevant memory maps were found." << std::endl;
+    return EXIT_FAILURE;
+  }
+#endif
 
   return EXIT_SUCCESS;
 }
diff --git a/dexlayout/dexdiag_test.cc b/dexlayout/dexdiag_test.cc
new file mode 100644
index 0000000..a0b3f32
--- /dev/null
+++ b/dexlayout/dexdiag_test.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+#include <vector>
+
+#include "common_runtime_test.h"
+
+#include "runtime/exec_utils.h"
+#include "runtime/oat_file.h"
+#include "runtime/os.h"
+
+namespace art {
+
+static const char* kDexDiagContains = "--contains=core.vdex";
+static const char* kDexDiagContainsFails = "--contains=anything_other_than_core.vdex";
+static const char* kDexDiagHelp = "--help";
+static const char* kDexDiagVerbose = "--verbose";
+static const char* kDexDiagBinaryName = "dexdiag";
+
+class DexDiagTest : public CommonRuntimeTest {
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTest::SetUp();
+  }
+
+  // Path to the dexdiag(d?)[32|64] binary.
+  std::string GetDexDiagFilePath() {
+    std::string root = GetTestAndroidRoot();
+
+    root += "/bin/";
+    root += kDexDiagBinaryName;
+
+    std::string root32 = root + "32";
+    // If we have both a 32-bit and a 64-bit build, the 32-bit file will have a 32 suffix.
+    if (OS::FileExists(root32.c_str()) && !Is64BitInstructionSet(kRuntimeISA)) {
+      return root32;
+    } else {
+      // This is a 64-bit build or only a single build exists.
+      return root;
+    }
+  }
+
+  std::unique_ptr<OatFile> OpenOatAndVdexFiles() {
+    // Open the core.oat file.
+    // This is a little convoluted because we have to
+    // get the location of the default core image (.../framework/core.oat),
+    // find it in the right architecture subdirectory (.../framework/arm/core.oat),
+    // Then, opening the oat file has the side-effect of opening the corresponding
+    // vdex file (.../framework/arm/core.vdex).
+    const std::string default_location = GetCoreOatLocation();
+    EXPECT_TRUE(!default_location.empty());
+    std::string oat_location = GetSystemImageFilename(default_location.c_str(), kRuntimeISA);
+    EXPECT_TRUE(!oat_location.empty());
+    std::cout << "==" << oat_location << std::endl;
+    std::string error_msg;
+    std::unique_ptr<OatFile> oat(OatFile::Open(oat_location.c_str(),
+                                               oat_location.c_str(),
+                                               nullptr,
+                                               nullptr,
+                                               false,
+                                               /*low_4gb*/false,
+                                               nullptr,
+                                               &error_msg));
+    EXPECT_TRUE(oat != nullptr) << error_msg;
+    return oat;
+  }
+
+  // Run dexdiag with a custom boot image location.
+  bool Exec(pid_t this_pid, const std::vector<std::string>& args, std::string* error_msg) {
+    // Invoke 'dexdiag' against the current process.
+    // This should succeed because we have a runtime and so it should
+    // be able to map in the boot.art and do a diff for it.
+    std::vector<std::string> exec_argv;
+
+    // Build the command line "dexdiag <args> this_pid".
+    std::string executable_path = GetDexDiagFilePath();
+    EXPECT_TRUE(OS::FileExists(executable_path.c_str())) << executable_path
+                                                         << " should be a valid file path";
+    exec_argv.push_back(executable_path);
+    for (const auto& arg : args) {
+      exec_argv.push_back(arg);
+    }
+    exec_argv.push_back(std::to_string(this_pid));
+
+    return ::art::Exec(exec_argv, error_msg);
+  }
+};
+
+// We can't run these tests on the host, as they will fail when trying to open
+// /proc/pid/pagemap.
+// On the target, we invoke 'dexdiag' against the current process.
+// This should succeed because we have a runtime and so dexdiag should
+// be able to find the map for, e.g., boot.vdex and friends.
+TEST_F(DexDiagTest, DexDiagHelpTest) {
+  // TODO: test the resulting output.
+  std::string error_msg;
+  ASSERT_TRUE(Exec(getpid(), { kDexDiagHelp }, &error_msg)) << "Failed to execute -- because: "
+                                                            << error_msg;
+}
+
+#if defined (ART_TARGET)
+TEST_F(DexDiagTest, DexDiagContainsTest) {
+#else
+TEST_F(DexDiagTest, DISABLED_DexDiagContainsTest) {
+#endif
+  std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles();
+  // TODO: test the resulting output.
+  std::string error_msg;
+  ASSERT_TRUE(Exec(getpid(), { kDexDiagContains }, &error_msg)) << "Failed to execute -- because: "
+                                                                << error_msg;
+}
+
+#if defined (ART_TARGET)
+TEST_F(DexDiagTest, DexDiagContainsFailsTest) {
+#else
+TEST_F(DexDiagTest, DISABLED_DexDiagContainsFailsTest) {
+#endif
+  std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles();
+  // TODO: test the resulting output.
+  std::string error_msg;
+  ASSERT_FALSE(Exec(getpid(), { kDexDiagContainsFails }, &error_msg))
+      << "Failed to execute -- because: "
+      << error_msg;
+}
+
+#if defined (ART_TARGET)
+TEST_F(DexDiagTest, DexDiagVerboseTest) {
+#else
+TEST_F(DexDiagTest, DISABLED_DexDiagVerboseTest) {
+#endif
+  // TODO: test the resulting output.
+  std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles();
+  std::string error_msg;
+  ASSERT_TRUE(Exec(getpid(), { kDexDiagVerbose }, &error_msg)) << "Failed to execute -- because: "
+                                                               << error_msg;
+}
+
+}  // namespace art
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 204af22..9f7861f 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1638,7 +1638,8 @@
 // Orders code items according to specified class data ordering.
 // NOTE: If the section following the code items is byte aligned, the last code item is left in
 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
-int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order) {
+int32_t DexLayout::LayoutCodeItems(const DexFile* dex_file,
+                                   std::vector<dex_ir::ClassData*> new_class_data_order) {
   // Do not move code items if class data section precedes code item section.
   // ULEB encoding is variable length, causing problems determining the offset of the code items.
   // TODO: We should swap the order of these sections in the future to avoid this issue.
@@ -1649,44 +1650,93 @@
   }
 
   // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
+  dex_ir::CodeItem* last_code_item = nullptr;
   std::unordered_set<dex_ir::CodeItem*> visited_code_items;
   bool is_code_item_aligned = IsNextSectionCodeItemAligned(code_item_offset);
   if (!is_code_item_aligned) {
-    dex_ir::CodeItem* last_code_item = nullptr;
     for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
       std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
-      if (last_code_item == nullptr || last_code_item->GetOffset() < code_item->GetOffset()) {
+      if (last_code_item == nullptr
+          || last_code_item->GetOffset() < code_item->GetOffset()) {
         last_code_item = code_item.get();
       }
     }
-    // Preserve the last code item by marking it already visited.
-    visited_code_items.insert(last_code_item);
   }
 
-  int32_t diff = 0;
-  for (dex_ir::ClassData* class_data : new_class_data_order) {
-    class_data->SetOffset(class_data->GetOffset() + diff);
-    for (auto& method : *class_data->DirectMethods()) {
-      dex_ir::CodeItem* code_item = method->GetCodeItem();
-      if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
-        visited_code_items.insert(code_item);
-        diff += UnsignedLeb128Size(code_item_offset) - UnsignedLeb128Size(code_item->GetOffset());
-        code_item->SetOffset(code_item_offset);
-        code_item_offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+  enum CodeItemKind {
+    kMethodNotExecuted = 0,
+    kMethodExecuted = 1,
+    kSize = 2,
+  };
+
+  static constexpr InvokeType invoke_types[] = {
+      kDirect,
+      kVirtual
+  };
+
+  std::unordered_set<dex_ir::CodeItem*> code_items[CodeItemKind::kSize];
+  for (InvokeType invoke_type : invoke_types) {
+    for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
+      const bool is_profile_class =
+          info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
+
+      // Skip classes that are not defined in this dex file.
+      dex_ir::ClassData* class_data = class_def->GetClassData();
+      if (class_data == nullptr) {
+        continue;
       }
-    }
-    for (auto& method : *class_data->VirtualMethods()) {
-      dex_ir::CodeItem* code_item = method->GetCodeItem();
-      if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
-        visited_code_items.insert(code_item);
-        diff += UnsignedLeb128Size(code_item_offset) - UnsignedLeb128Size(code_item->GetOffset());
-        code_item->SetOffset(code_item_offset);
-        code_item_offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+      for (auto& method : *(invoke_type == InvokeType::kDirect
+                                ? class_data->DirectMethods()
+                                : class_data->VirtualMethods())) {
+        const dex_ir::MethodId *method_id = method->GetMethodId();
+        dex_ir::CodeItem *code_item = method->GetCodeItem();
+        if (code_item == last_code_item || code_item == nullptr) {
+          continue;
+        }
+        // Separate executed methods (clinits and profiled methods) from unexecuted methods.
+        // TODO: clinits are executed only once, consider separating them further.
+        const bool is_clinit = is_profile_class &&
+            (method->GetAccessFlags() & kAccConstructor) != 0 &&
+            (method->GetAccessFlags() & kAccStatic) != 0;
+        const bool is_method_executed = is_clinit ||
+            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+        code_items[is_method_executed
+                       ? CodeItemKind::kMethodExecuted
+                       : CodeItemKind::kMethodNotExecuted]
+            .insert(code_item);
       }
     }
   }
+
+  // total_diff includes diffs generated by both executed and non-executed methods.
+  int32_t total_diff = 0;
+  // The relative placement has no effect on correctness; it is used to ensure
+  // the layout is deterministic
+  for (std::unordered_set<dex_ir::CodeItem*>& code_items_set : code_items) {
+    // diff is reset for executed and non-executed methods.
+    int32_t diff = 0;
+    for (dex_ir::ClassData* data : new_class_data_order) {
+      data->SetOffset(data->GetOffset() + diff);
+      for (InvokeType invoke_type : invoke_types) {
+        for (auto &method : *(invoke_type == InvokeType::kDirect
+                                  ? data->DirectMethods()
+                                  : data->VirtualMethods())) {
+          dex_ir::CodeItem* code_item = method->GetCodeItem();
+          if (code_item != nullptr &&
+              code_items_set.find(code_item) != code_items_set.end()) {
+            diff += UnsignedLeb128Size(code_item_offset)
+                - UnsignedLeb128Size(code_item->GetOffset());
+            code_item->SetOffset(code_item_offset);
+            code_item_offset +=
+                RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+          }
+        }
+      }
+    }
+    total_diff += diff;
+  }
   // Adjust diff to be 4-byte aligned.
-  return RoundUp(diff, kDexCodeItemAlignment);
+  return RoundUp(total_diff, kDexCodeItemAlignment);
 }
 
 bool DexLayout::IsNextSectionCodeItemAligned(uint32_t offset) {
@@ -1795,7 +1845,7 @@
 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
   LayoutStringData(dex_file);
   std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
-  int32_t diff = LayoutCodeItems(new_class_data_order);
+  int32_t diff = LayoutCodeItems(dex_file, new_class_data_order);
   // Move sections after ClassData by diff bytes.
   FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
   // Update file size.
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index 69117ad..531bc98 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -108,7 +108,8 @@
   void DumpDexFile();
 
   std::vector<dex_ir::ClassData*> LayoutClassDefsAndClassData(const DexFile* dex_file);
-  int32_t LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order);
+  int32_t LayoutCodeItems(const DexFile* dex_file,
+                          std::vector<dex_ir::ClassData*> new_class_data_order);
   void LayoutStringData(const DexFile* dex_file);
   bool IsNextSectionCodeItemAligned(uint32_t offset);
   template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index 38faf96..3c627ea 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -170,14 +170,14 @@
   }
 
   // Open profile file.
-  ProfileCompilationInfo* profile_info = nullptr;
+  std::unique_ptr<ProfileCompilationInfo> profile_info;
   if (options.profile_file_name_) {
     int profile_fd = open(options.profile_file_name_, O_RDONLY);
     if (profile_fd < 0) {
       fprintf(stderr, "Can't open %s\n", options.profile_file_name_);
       return 1;
     }
-    profile_info = new ProfileCompilationInfo();
+    profile_info.reset(new ProfileCompilationInfo());
     if (!profile_info->Load(profile_fd)) {
       fprintf(stderr, "Can't read profile info from %s\n", options.profile_file_name_);
       return 1;
@@ -185,13 +185,19 @@
   }
 
   // Create DexLayout instance.
-  DexLayout dex_layout(options, profile_info, out_file);
+  DexLayout dex_layout(options, profile_info.get(), out_file);
 
   // Process all files supplied on command line.
   int result = 0;
   while (optind < argc) {
     result |= dex_layout.ProcessFile(argv[optind++]);
   }  // while
+
+  if (options.output_file_name_) {
+    CHECK(out_file != nullptr && out_file != stdout);
+    fclose(out_file);
+  }
+
   return result != 0;
 }
 
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index e988aac..877ea92 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -205,6 +205,19 @@
     "AAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAAuAAAAAYAAAABAAAA0AAAAAEgAAACAAAA"
     "8AAAAAIgAAAIAAAAHAEAAAMgAAACAAAAVAEAAAAgAAABAAAAYwEAAAAQAAABAAAAdAEAAA==";
 
+// Dex file with multiple class data items pointing to the same code item.
+// Constructed by hex editing.
+static const char kDuplicateCodeItemInputDex[] =
+    "ZGV4CjAzNQCwKtVglQOmLWuHwldN5jkBOInC7mTMhJMAAgAAcAAAAHhWNBIAAAAAAAAAAHgBAAAH"
+    "AAAAcAAAAAMAAACMAAAAAQAAAJgAAAAAAAAAAAAAAAQAAACkAAAAAQAAAMQAAAAcAQAA5AAAACQB"
+    "AAAsAQAANAEAADkBAABNAQAAUAEAAFMBAAACAAAAAwAAAAQAAAAEAAAAAgAAAAAAAAAAAAAAAAAA"
+    "AAAAAAAFAAAAAAAAAAYAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAABAAAAAAAAAGUBAAAAAAAA"
+    "AQABAAEAAABWAQAABAAAAHAQAwAAAA4AAQABAAAAAABbAQAAAQAAAA4AAAABAAEAAAAAAGABAAAB"
+    "AAAADgAAAAY8aW5pdD4ABkEuamF2YQADTEE7ABJMamF2YS9sYW5nL09iamVjdDsAAVYAAWEAAWIA"
+    "AQAHDgADAAcOAAUABw4AAAABAgCBgATkAQEA/AEBAPwBAAsAAAAAAAAAAQAAAAAAAAABAAAABwAA"
+    "AHAAAAACAAAAAwAAAIwAAAADAAAAAQAAAJgAAAAFAAAABAAAAKQAAAAGAAAAAQAAAMQAAAABIAAA"
+    "AwAAAOQAAAACIAAABwAAACQBAAADIAAAAwAAAFYBAAAAIAAAAQAAAGUBAAAAEAAAAQAAAHgBAAA=";
+
 static void WriteBase64ToFile(const char* base64, File* file) {
   // Decode base64.
   CHECK(base64 != nullptr);
@@ -519,4 +532,17 @@
                             dexlayout_exec_argv));
 }
 
+TEST_F(DexLayoutTest, DuplicateCodeItem) {
+  ScratchFile temp_dex;
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kDuplicateCodeItemInputDex,
+                            nullptr /* profile_file */,
+                            nullptr /* profile_filename */,
+                            dexlayout_exec_argv));
+}
+
 }  // namespace art
diff --git a/dexoptanalyzer/dexoptanalyzer.cc b/dexoptanalyzer/dexoptanalyzer.cc
index 965e407..9a2eb7f 100644
--- a/dexoptanalyzer/dexoptanalyzer.cc
+++ b/dexoptanalyzer/dexoptanalyzer.cc
@@ -216,6 +216,8 @@
     if (!CreateRuntime()) {
       return kErrorCannotCreateRuntime;
     }
+    std::unique_ptr<Runtime> runtime(Runtime::Current());
+
     OatFileAssistant oat_file_assistant(dex_file_.c_str(), isa_, /*load_executable*/ false);
     // Always treat elements of the bootclasspath as up-to-date.
     // TODO(calin): this check should be in OatFileAssistant.
diff --git a/dexoptanalyzer/dexoptanalyzer_test.cc b/dexoptanalyzer/dexoptanalyzer_test.cc
index 57d3f1f..1703ff4 100644
--- a/dexoptanalyzer/dexoptanalyzer_test.cc
+++ b/dexoptanalyzer/dexoptanalyzer_test.cc
@@ -89,8 +89,8 @@
   Copy(GetDexSrc1(), dex_location);
 
   Verify(dex_location, CompilerFilter::kSpeed);
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
-  Verify(dex_location, CompilerFilter::kInterpretOnly);
+  Verify(dex_location, CompilerFilter::kExtract);
+  Verify(dex_location, CompilerFilter::kQuicken);
   Verify(dex_location, CompilerFilter::kSpeedProfile);
 }
 
@@ -101,8 +101,8 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   Verify(dex_location, CompilerFilter::kSpeed);
-  Verify(dex_location, CompilerFilter::kInterpretOnly);
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
+  Verify(dex_location, CompilerFilter::kQuicken);
+  Verify(dex_location, CompilerFilter::kExtract);
   Verify(dex_location, CompilerFilter::kEverything);
 }
 
@@ -113,9 +113,9 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
 
   Verify(dex_location, CompilerFilter::kSpeedProfile, false);
-  Verify(dex_location, CompilerFilter::kInterpretOnly, false);
+  Verify(dex_location, CompilerFilter::kQuicken, false);
   Verify(dex_location, CompilerFilter::kSpeedProfile, true);
-  Verify(dex_location, CompilerFilter::kInterpretOnly, true);
+  Verify(dex_location, CompilerFilter::kQuicken, true);
 }
 
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
@@ -154,7 +154,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
+  Verify(dex_location, CompilerFilter::kExtract);
   Verify(dex_location, CompilerFilter::kSpeed);
 }
 
@@ -170,8 +170,8 @@
                      /*pic*/false,
                      /*with_alternate_image*/true);
 
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
-  Verify(dex_location, CompilerFilter::kInterpretOnly);
+  Verify(dex_location, CompilerFilter::kExtract);
+  Verify(dex_location, CompilerFilter::kQuicken);
   Verify(dex_location, CompilerFilter::kSpeed);
 }
 
@@ -184,13 +184,13 @@
 
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(),
-                     CompilerFilter::kVerifyAtRuntime,
+                     CompilerFilter::kExtract,
                      /*relocate*/true,
                      /*pic*/false,
                      /*with_alternate_image*/true);
 
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
-  Verify(dex_location, CompilerFilter::kInterpretOnly);
+  Verify(dex_location, CompilerFilter::kExtract);
+  Verify(dex_location, CompilerFilter::kQuicken);
 }
 
 // Case: We have a DEX file and an ODEX file, but no OAT file.
@@ -201,7 +201,7 @@
   Copy(GetDexSrc1(), dex_location);
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
+  Verify(dex_location, CompilerFilter::kExtract);
   Verify(dex_location, CompilerFilter::kSpeed);
 }
 
@@ -235,7 +235,7 @@
   // Strip the dex file.
   Copy(GetStrippedDexSrc1(), dex_location);
 
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
+  Verify(dex_location, CompilerFilter::kExtract);
   Verify(dex_location, CompilerFilter::kSpeed);
   Verify(dex_location, CompilerFilter::kEverything);
 }
@@ -248,8 +248,8 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   Verify(dex_location, CompilerFilter::kSpeed);
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
-  Verify(dex_location, CompilerFilter::kInterpretOnly);
+  Verify(dex_location, CompilerFilter::kExtract);
+  Verify(dex_location, CompilerFilter::kQuicken);
 }
 
 // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
@@ -287,9 +287,9 @@
   std::string odex_location = GetOdexDir() + "/DexVerifyAtRuntimeOdexNoOat.odex";
 
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kExtract);
 
-  Verify(dex_location, CompilerFilter::kVerifyAtRuntime);
+  Verify(dex_location, CompilerFilter::kExtract);
   Verify(dex_location, CompilerFilter::kSpeed);
 }
 
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index eb57d33..3c60bf4 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -433,6 +433,11 @@
   { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x12, "div_u", "Vkmn" },
   { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x12, "mod_s", "Vkmn" },
   { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x12, "mod_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x10, "add_a", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x4 << 23) | 0x10, "ave_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x10, "ave_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x10, "aver_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x10, "aver_u", "Vkmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x0 << 22) | 0x1b, "fadd", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x1b, "fsub", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x2 << 22) | 0x1b, "fmul", "Ukmn" },
diff --git a/oatdump/Android.bp b/oatdump/Android.bp
index f1fcf3d..1cd97c2 100644
--- a/oatdump/Android.bp
+++ b/oatdump/Android.bp
@@ -114,5 +114,8 @@
     defaults: [
         "art_gtest_defaults",
     ],
-    srcs: ["oatdump_test.cc"],
+    srcs: [
+        "oatdump_test.cc",
+        "oatdump_image_test.cc",
+    ],
 }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 878d0f2..f07e0f9 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -125,9 +125,12 @@
     std::unique_ptr<const InstructionSetFeatures> features = InstructionSetFeatures::FromBitmap(
         isa, oat_file_->GetOatHeader().GetInstructionSetFeaturesBitmap());
 
-    File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
+    std::unique_ptr<File> elf_file(OS::CreateEmptyFile(output_name_.c_str()));
+    if (elf_file == nullptr) {
+      return false;
+    }
     std::unique_ptr<BufferedOutputStream> output_stream(
-        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file.get())));
     builder_.reset(new ElfBuilder<ElfTypes>(isa, features.get(), output_stream.get()));
 
     builder_->Start();
@@ -182,7 +185,17 @@
 
     builder_->End();
 
-    return builder_->Good();
+    bool ret_value = builder_->Good();
+
+    builder_.reset();
+    output_stream.reset();
+
+    if (elf_file->FlushCloseOrErase() != 0) {
+      return false;
+    }
+    elf_file.reset();
+
+    return ret_value;
   }
 
   void Walk() {
@@ -2842,14 +2855,14 @@
 
 static int SymbolizeOat(const char* oat_filename, std::string& output_name, bool no_bits) {
   std::string error_msg;
-  OatFile* oat_file = OatFile::Open(oat_filename,
-                                    oat_filename,
-                                    nullptr,
-                                    nullptr,
-                                    false,
-                                    /*low_4gb*/false,
-                                    nullptr,
-                                    &error_msg);
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_filename,
+                                                  oat_filename,
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  /*low_4gb*/false,
+                                                  nullptr,
+                                                  &error_msg));
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
@@ -2859,10 +2872,10 @@
   // Try to produce an ELF file of the same type. This is finicky, as we have used 32-bit ELF
   // files for 64-bit code in the past.
   if (Is64BitInstructionSet(oat_file->GetOatHeader().GetInstructionSet())) {
-    OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file, output_name, no_bits);
+    OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file.get(), output_name, no_bits);
     result = oat_symbolizer.Symbolize();
   } else {
-    OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file, output_name, no_bits);
+    OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file.get(), output_name, no_bits);
     result = oat_symbolizer.Symbolize();
   }
   if (!result) {
diff --git a/oatdump/oatdump_image_test.cc b/oatdump/oatdump_image_test.cc
new file mode 100644
index 0000000..e9cc922
--- /dev/null
+++ b/oatdump/oatdump_image_test.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oatdump_test.h"
+
+namespace art {
+
+// Disable tests on arm and mips as they are taking too long to run. b/27824283.
+#if !defined(__arm__) && !defined(__mips__)
+TEST_F(OatDumpTest, TestImage) {
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kDynamic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
+}
+TEST_F(OatDumpTest, TestImageStatic) {
+  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kStatic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
+}
+
+TEST_F(OatDumpTest, TestOatImage) {
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kDynamic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
+}
+TEST_F(OatDumpTest, TestOatImageStatic) {
+  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kStatic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
+}
+#endif
+}  // namespace art
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index c7c3ddd7..7260d74 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -14,235 +14,12 @@
  * limitations under the License.
  */
 
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "android-base/strings.h"
-
-#include "common_runtime_test.h"
-
-#include "base/unix_file/fd_file.h"
-#include "runtime/arch/instruction_set.h"
-#include "runtime/exec_utils.h"
-#include "runtime/gc/heap.h"
-#include "runtime/gc/space/image_space.h"
-#include "runtime/os.h"
-#include "runtime/utils.h"
-#include "utils.h"
-
-#include <sys/types.h>
-#include <unistd.h>
+#include "oatdump_test.h"
 
 namespace art {
 
-class OatDumpTest : public CommonRuntimeTest {
- protected:
-  virtual void SetUp() {
-    CommonRuntimeTest::SetUp();
-    core_art_location_ = GetCoreArtLocation();
-    core_oat_location_ = GetSystemImageFilename(GetCoreOatLocation().c_str(), kRuntimeISA);
-  }
-
-  // Linking flavor.
-  enum Flavor {
-    kDynamic,  // oatdump(d)
-    kStatic,   // oatdump(d)s
-  };
-
-  // Returns path to the oatdump binary.
-  std::string GetOatDumpFilePath(Flavor flavor) {
-    std::string root = GetTestAndroidRoot();
-    root += "/bin/oatdump";
-    if (kIsDebugBuild) {
-      root += "d";
-    }
-    if (flavor == kStatic) {
-      root += "s";
-    }
-    return root;
-  }
-
-  enum Mode {
-    kModeOat,
-    kModeArt,
-    kModeSymbolize,
-  };
-
-  // Display style.
-  enum Display {
-    kListOnly,
-    kListAndCode
-  };
-
-  // Run the test with custom arguments.
-  bool Exec(Flavor flavor,
-            Mode mode,
-            const std::vector<std::string>& args,
-            Display display,
-            std::string* error_msg) {
-    std::string file_path = GetOatDumpFilePath(flavor);
-
-    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
-
-    // ScratchFile scratch;
-    std::vector<std::string> exec_argv = { file_path };
-    std::vector<std::string> expected_prefixes;
-    if (mode == kModeSymbolize) {
-      exec_argv.push_back("--symbolize=" + core_oat_location_);
-      exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
-    } else {
-      expected_prefixes.push_back("Dex file data for");
-      expected_prefixes.push_back("Num string ids:");
-      expected_prefixes.push_back("Num field ids:");
-      expected_prefixes.push_back("Num method ids:");
-      expected_prefixes.push_back("LOCATION:");
-      expected_prefixes.push_back("MAGIC:");
-      expected_prefixes.push_back("DEX FILE COUNT:");
-      if (display == kListAndCode) {
-        // Code and dex code do not show up if list only.
-        expected_prefixes.push_back("DEX CODE:");
-        expected_prefixes.push_back("CODE:");
-        expected_prefixes.push_back("CodeInfoEncoding");
-        expected_prefixes.push_back("CodeInfoInlineInfo");
-      }
-      if (mode == kModeArt) {
-        exec_argv.push_back("--image=" + core_art_location_);
-        exec_argv.push_back("--instruction-set=" + std::string(
-            GetInstructionSetString(kRuntimeISA)));
-        expected_prefixes.push_back("IMAGE LOCATION:");
-        expected_prefixes.push_back("IMAGE BEGIN:");
-        expected_prefixes.push_back("kDexCaches:");
-      } else {
-        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
-        exec_argv.push_back("--oat-file=" + core_oat_location_);
-      }
-    }
-    exec_argv.insert(exec_argv.end(), args.begin(), args.end());
-
-    bool result = true;
-    // We must set --android-root.
-    int link[2];
-    if (pipe(link) == -1) {
-      *error_msg = strerror(errno);
-      return false;
-    }
-
-    const pid_t pid = fork();
-    if (pid == -1) {
-      *error_msg = strerror(errno);
-      return false;
-    }
-
-    if (pid == 0) {
-      dup2(link[1], STDOUT_FILENO);
-      close(link[0]);
-      close(link[1]);
-      // change process groups, so we don't get reaped by ProcessManager
-      setpgid(0, 0);
-      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
-      std::vector<char*> argv;
-      for (size_t i = 0; i < exec_argv.size(); ++i) {
-        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
-      }
-      argv.push_back(nullptr);
-      UNUSED(execv(argv[0], &argv[0]));
-      const std::string command_line(android::base::Join(exec_argv, ' '));
-      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
-      // _exit to avoid atexit handlers in child.
-      _exit(1);
-    } else {
-      close(link[1]);
-      static const size_t kLineMax = 256;
-      char line[kLineMax] = {};
-      size_t line_len = 0;
-      size_t total = 0;
-      std::vector<bool> found(expected_prefixes.size(), false);
-      while (true) {
-        while (true) {
-          size_t spaces = 0;
-          // Trim spaces at the start of the line.
-          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
-          if (spaces > 0) {
-            line_len -= spaces;
-            memmove(&line[0], &line[spaces], line_len);
-          }
-          ssize_t bytes_read =
-              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
-          if (bytes_read <= 0) {
-            break;
-          }
-          line_len += bytes_read;
-          total += bytes_read;
-        }
-        if (line_len == 0) {
-          break;
-        }
-        // Check contents.
-        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
-          const std::string& expected = expected_prefixes[i];
-          if (!found[i] &&
-              line_len >= expected.length() &&
-              memcmp(line, expected.c_str(), expected.length()) == 0) {
-            found[i] = true;
-          }
-        }
-        // Skip to next line.
-        size_t next_line = 0;
-        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
-        line_len -= next_line + 1;
-        memmove(&line[0], &line[next_line + 1], line_len);
-      }
-      if (mode == kModeSymbolize) {
-        EXPECT_EQ(total, 0u);
-      } else {
-        EXPECT_GT(total, 0u);
-      }
-      LOG(INFO) << "Processed bytes " << total;
-      close(link[0]);
-      int status = 0;
-      if (waitpid(pid, &status, 0) != -1) {
-        result = (status == 0);
-      }
-
-      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
-        if (!found[i]) {
-          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
-          result = false;
-        }
-      }
-    }
-
-    return result;
-  }
-
- private:
-  std::string core_art_location_;
-  std::string core_oat_location_;
-};
-
 // Disable tests on arm and mips as they are taking too long to run. b/27824283.
 #if !defined(__arm__) && !defined(__mips__)
-TEST_F(OatDumpTest, TestImage) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kDynamic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
-}
-TEST_F(OatDumpTest, TestImageStatic) {
-  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kStatic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
-}
-
-TEST_F(OatDumpTest, TestOatImage) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kDynamic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
-}
-TEST_F(OatDumpTest, TestOatImageStatic) {
-  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kStatic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
-}
-
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
   ASSERT_TRUE(Exec(kDynamic, kModeArt, {"--no-dump:vmap"}, kListAndCode, &error_msg)) << error_msg;
diff --git a/oatdump/oatdump_test.h b/oatdump/oatdump_test.h
new file mode 100644
index 0000000..48e9eb5
--- /dev/null
+++ b/oatdump/oatdump_test.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_OATDUMP_OATDUMP_TEST_H_
+#define ART_OATDUMP_OATDUMP_TEST_H_
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "android-base/strings.h"
+
+#include "common_runtime_test.h"
+
+#include "base/unix_file/fd_file.h"
+#include "runtime/arch/instruction_set.h"
+#include "runtime/exec_utils.h"
+#include "runtime/gc/heap.h"
+#include "runtime/gc/space/image_space.h"
+#include "runtime/os.h"
+#include "runtime/utils.h"
+#include "utils.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace art {
+
+class OatDumpTest : public CommonRuntimeTest {
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTest::SetUp();
+    core_art_location_ = GetCoreArtLocation();
+    core_oat_location_ = GetSystemImageFilename(GetCoreOatLocation().c_str(), kRuntimeISA);
+  }
+
+  // Linking flavor.
+  enum Flavor {
+    kDynamic,  // oatdump(d)
+    kStatic,   // oatdump(d)s
+  };
+
+  // Returns path to the oatdump binary.
+  std::string GetOatDumpFilePath(Flavor flavor) {
+    std::string root = GetTestAndroidRoot();
+    root += "/bin/oatdump";
+    if (kIsDebugBuild) {
+      root += "d";
+    }
+    if (flavor == kStatic) {
+      root += "s";
+    }
+    return root;
+  }
+
+  enum Mode {
+    kModeOat,
+    kModeArt,
+    kModeSymbolize,
+  };
+
+  // Display style.
+  enum Display {
+    kListOnly,
+    kListAndCode
+  };
+
+  // Run the test with custom arguments.
+  bool Exec(Flavor flavor,
+            Mode mode,
+            const std::vector<std::string>& args,
+            Display display,
+            std::string* error_msg) {
+    std::string file_path = GetOatDumpFilePath(flavor);
+
+    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
+
+    // ScratchFile scratch;
+    std::vector<std::string> exec_argv = { file_path };
+    std::vector<std::string> expected_prefixes;
+    if (mode == kModeSymbolize) {
+      exec_argv.push_back("--symbolize=" + core_oat_location_);
+      exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
+    } else {
+      expected_prefixes.push_back("Dex file data for");
+      expected_prefixes.push_back("Num string ids:");
+      expected_prefixes.push_back("Num field ids:");
+      expected_prefixes.push_back("Num method ids:");
+      expected_prefixes.push_back("LOCATION:");
+      expected_prefixes.push_back("MAGIC:");
+      expected_prefixes.push_back("DEX FILE COUNT:");
+      if (display == kListAndCode) {
+        // Code and dex code do not show up if list only.
+        expected_prefixes.push_back("DEX CODE:");
+        expected_prefixes.push_back("CODE:");
+        expected_prefixes.push_back("CodeInfoEncoding");
+        expected_prefixes.push_back("CodeInfoInlineInfo");
+      }
+      if (mode == kModeArt) {
+        exec_argv.push_back("--image=" + core_art_location_);
+        exec_argv.push_back("--instruction-set=" + std::string(
+            GetInstructionSetString(kRuntimeISA)));
+        expected_prefixes.push_back("IMAGE LOCATION:");
+        expected_prefixes.push_back("IMAGE BEGIN:");
+        expected_prefixes.push_back("kDexCaches:");
+      } else {
+        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
+        exec_argv.push_back("--oat-file=" + core_oat_location_);
+      }
+    }
+    exec_argv.insert(exec_argv.end(), args.begin(), args.end());
+
+    bool result = true;
+    // We must set --android-root.
+    int link[2];
+    if (pipe(link) == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    const pid_t pid = fork();
+    if (pid == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    if (pid == 0) {
+      dup2(link[1], STDOUT_FILENO);
+      close(link[0]);
+      close(link[1]);
+      // change process groups, so we don't get reaped by ProcessManager
+      setpgid(0, 0);
+      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
+      std::vector<char*> argv;
+      for (size_t i = 0; i < exec_argv.size(); ++i) {
+        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
+      }
+      argv.push_back(nullptr);
+      UNUSED(execv(argv[0], &argv[0]));
+      const std::string command_line(android::base::Join(exec_argv, ' '));
+      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
+      // _exit to avoid atexit handlers in child.
+      _exit(1);
+    } else {
+      close(link[1]);
+      static const size_t kLineMax = 256;
+      char line[kLineMax] = {};
+      size_t line_len = 0;
+      size_t total = 0;
+      std::vector<bool> found(expected_prefixes.size(), false);
+      while (true) {
+        while (true) {
+          size_t spaces = 0;
+          // Trim spaces at the start of the line.
+          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
+          if (spaces > 0) {
+            line_len -= spaces;
+            memmove(&line[0], &line[spaces], line_len);
+          }
+          ssize_t bytes_read =
+              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
+          if (bytes_read <= 0) {
+            break;
+          }
+          line_len += bytes_read;
+          total += bytes_read;
+        }
+        if (line_len == 0) {
+          break;
+        }
+        // Check contents.
+        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+          const std::string& expected = expected_prefixes[i];
+          if (!found[i] &&
+              line_len >= expected.length() &&
+              memcmp(line, expected.c_str(), expected.length()) == 0) {
+            found[i] = true;
+          }
+        }
+        // Skip to next line.
+        size_t next_line = 0;
+        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
+        line_len -= next_line + 1;
+        memmove(&line[0], &line[next_line + 1], line_len);
+      }
+      if (mode == kModeSymbolize) {
+        EXPECT_EQ(total, 0u);
+      } else {
+        EXPECT_GT(total, 0u);
+      }
+      LOG(INFO) << "Processed bytes " << total;
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        result = (status == 0);
+      }
+
+      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+        if (!found[i]) {
+          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
+          result = false;
+        }
+      }
+    }
+
+    return result;
+  }
+
+ private:
+  std::string core_art_location_;
+  std::string core_oat_location_;
+};
+
+}  // namespace art
+
+#endif  // ART_OATDUMP_OATDUMP_TEST_H_
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index fbb0978..e750ede 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -30,6 +30,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/dumpable.h"
+#include "base/memory_tool.h"
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
 #include "base/unix_file/fd_file.h"
@@ -142,6 +143,8 @@
     LOG(ERROR) << "Unable to initialize runtime";
     return false;
   }
+  std::unique_ptr<Runtime> runtime(Runtime::Current());
+
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now and then switch to a more manageable ScopedObjectAccess.
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
@@ -286,6 +289,13 @@
       return false;
     }
   }
+
+  if (!kIsDebugBuild && !(RUNNING_ON_MEMORY_TOOL && kMemoryToolDetectsLeaks)) {
+    // We want to just exit on non-debug builds, not bringing the runtime down
+    // in an orderly fashion. So release the following fields.
+    runtime.release();
+  }
+
   return true;
 }
 
diff --git a/runtime/Android.bp b/runtime/Android.bp
index cff2cbc..1869968 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -57,6 +57,7 @@
         "dex_file_annotations.cc",
         "dex_file_verifier.cc",
         "dex_instruction.cc",
+        "dex_to_dex_decompiler.cc",
         "elf_file.cc",
         "exec_utils.cc",
         "fault_handler.cc",
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index a857976..1a5e39f0 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -71,6 +71,11 @@
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
+#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
 }  // namespace arm
 
 namespace arm64 {
@@ -83,6 +88,11 @@
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
+#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
 }  // namespace arm64
 
 namespace mips {
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index c03bcae..f1f1766 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -24,6 +24,28 @@
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
 #define FRAME_SIZE_SAVE_EVERYTHING 192
 
+// The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
+// i.e. art_quick_read_barrier_mark_introspection_arrays.
+#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100
+// The offset from art_quick_read_barrier_mark_introspection to the GC root entrypoint,
+// i.e. art_quick_read_barrier_mark_introspection_gc_roots.
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0xc0
+
+// The offset of the reference load LDR from the return address in LR for field loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8
+#else
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4
+#endif
+// The offset of the reference load LDR from the return address in LR for array loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8
+#else
+#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4
+#endif
+// The offset of the reference load LDR from the return address in LR for GC root loads.
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8
+
 // Flag for enabling R4 optimization in arm runtime
 // #define ARM_R4_SUSPEND_FLAG
 
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index de72d3a..6b72477 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -17,6 +17,7 @@
 #include <math.h>
 #include <string.h>
 
+#include "arch/arm/asm_support_arm.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -51,6 +52,10 @@
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
 
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*);
+
 // Used by soft float.
 // Single-precision FP arithmetics.
 extern "C" float fmodf(float a, float b);              // REM_FLOAT[_2ADDR]
@@ -67,19 +72,35 @@
 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
-  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
-  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
-  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
-  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
-  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
-  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
-  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
-  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
-  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
-  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
-  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
-  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
+  qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
+
+  // Check that array switch cases are at appropriate offsets from the introspection entrypoint.
+  // For the alignment check, strip the Thumb mode bit.
+  DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u);
+  intptr_t array_diff =
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) -
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+  DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff);
+  // Check that the GC root entrypoint is at appropriate offset from the introspection entrypoint.
+  intptr_t gc_roots_diff =
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots) -
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+  DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff);
+  // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12.
+  // We're using the entry to hold a pointer to the introspection entrypoint instead.
+  qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr;
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
@@ -138,7 +159,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false);
   qpoints->pReadBarrierMarkReg12 = nullptr;  // Cannot use register 12 (IP) to pass arguments.
   qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
   qpoints->pReadBarrierMarkReg14 = nullptr;  // Cannot use register 14 (LR) to pass arguments.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 029de46..fa21208 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -872,7 +872,7 @@
     POISON_HEAP_REF r2
     str r2, [r3, r1, lsl #2]
     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
-    lsr r0, r0, #7
+    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
     strb r3, [r3, r0]
     blx lr
 .Ldo_aput_null:
@@ -900,7 +900,7 @@
     POISON_HEAP_REF r2
     str r2, [r3, r1, lsl #2]
     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
-    lsr r0, r0, #7
+    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
     strb r3, [r3, r0]
     blx lr
 .Lthrow_array_store_exception:
@@ -2146,6 +2146,216 @@
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
 
+// Helper macros for Baker CC read barrier mark introspection (BRBMI).
+.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register
+    \macro_for_register r0
+    \macro_for_register r1
+    \macro_for_register r2
+    \macro_for_register r3
+    \macro_for_reserved_register  // R4 is reserved for the entrypoint address.
+    \macro_for_register r5
+    \macro_for_register r6
+    \macro_for_register r7
+    \macro_for_register r8
+    \macro_for_register r9
+    \macro_for_register r10
+    \macro_for_register r11
+.endm
+
+.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
+    BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register
+    \macro_for_reserved_register  // IP is reserved.
+    \macro_for_reserved_register  // SP is reserved.
+    \macro_for_reserved_register  // LR is reserved.
+    \macro_for_reserved_register  // PC is reserved.
+.endm
+
+.macro BRBMI_RETURN_SWITCH_CASE reg
+.Lmark_introspection_return_switch_case_\reg:
+    mov     \reg, ip
+    bx      lr
+.endm
+
+.macro BRBMI_BAD_RETURN_SWITCH_CASE
+.Lmark_introspection_return_switch_case_bad:
+    BRBMI_BKPT_FILL_4B
+.endm
+
+.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
+    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
+.endm
+
+.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
+    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
+.endm
+
+#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#error "Array and field introspection code sharing requires same LDR offset."
+#endif
+.macro BRBMI_ARRAY_LOAD index_reg
+    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
+    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
+    .balign 8                                           // Add padding to 8 bytes.
+.endm
+
+.macro BRBMI_BKPT_FILL_4B
+    bkpt    0
+    bkpt    0
+.endm
+
+.macro BRBMI_BKPT_FILL_8B
+    BRBMI_BKPT_FILL_4B
+    BRBMI_BKPT_FILL_4B
+.endm
+
+.macro BRBMI_SLOW_PATH ldr_offset
+    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r7, 16
+    .cfi_rel_offset lr, 20
+
+    mov     r0, ip                    // Pass the reference.
+    vpush {s0-s15}                    // save floating-point caller-save registers
+    .cfi_adjust_cfa_offset 64
+    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
+    vpop    {s0-s15}                  // restore floating-point registers
+    .cfi_adjust_cfa_offset -64
+    mov     ip, r0                    // Move reference to ip in preparation for return switch.
+
+    pop     {r0-r3, r7, lr}           // Restore registers.
+    .cfi_adjust_cfa_offset -24
+    .cfi_restore r0
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r7
+    .cfi_restore lr
+
+    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
+    ldrh    r4, [lr, #(-1 + \ldr_offset + 2)]
+    lsr     r4, r4, #12               // Extract `ref_reg`.
+    b       .Lmark_introspection_return_switch
+.endm
+
+    /*
+     * Use introspection to load a reference from the same address as the LDR
+     * instruction in generated code would load (unless loaded by the thunk,
+     * see below), call ReadBarrier::Mark() with that reference if needed
+     * and return it in the same register as the LDR instruction would load.
+     *
+     * The entrypoint is called through a thunk that differs across load kinds.
+     * For field and array loads the LDR instruction in generated code follows
+     * the branch to the thunk, i.e. the LDR is at [LR, #(-4 - 1)] where the -1
+     * is an adjustment for the Thumb mode bit in LR, and the thunk knows the
+     * holder and performs the gray bit check, returning to the LDR instruction
+     * if the object is not gray, so this entrypoint no longer needs to know
+     * anything about the holder. For GC root loads, the LDR instruction in
+     * generated code precedes the branch to the thunk, i.e. the LDR is at
+     * [LR, #(-8 - 1)] where the -1 is again the Thumb mode bit adjustment, and
+     * the thunk does not do the gray bit check.
+     *
+     * For field accesses and array loads with a constant index the thunk loads
+     * the reference into IP using introspection and calls the main entrypoint,
+     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
+     * the passed reference is poisoned.
+     *
+     * For array accesses with non-constant index, the thunk inserts the bits
+     * 0-5 of the LDR instruction to the entrypoint address, effectively
+     * calculating a switch case label based on the index register (bits 0-3)
+     * and adding an extra offset (bits 4-5 hold the shift which is always 2
+     * for reference loads) to differentiate from the main entrypoint, then
+     * moves the base register to IP and jumps to the switch case. Therefore
+     * we need to align the main entrypoint to 512 bytes, accounting for
+     * a 256-byte offset followed by 16 array entrypoints starting at
+     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
+     * (register) and a branch to the main entrypoint.
+     *
+     * For GC root accesses we cannot use the main entrypoint because of the
+     * different offset where the LDR instruction in generated code is located.
+     * (And even with heap poisoning enabled, GC roots are not poisoned.)
+     * To re-use the same entrypoint pointer in generated code, we make sure
+     * that the gc root entrypoint (a copy of the entrypoint with a different
+     * offset for introspection loads) is located at a known offset (128 bytes,
+     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
+     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
+     * the root register to IP and jumps to the customized entrypoint,
+     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
+     * performs all the fast-path checks, so we need just the slow path.
+     *
+     * The code structure is
+     *   art_quick_read_barrier_mark_introspection:
+     *     Over 128 bytes for the main entrypoint code.
+     *     Padding to 192 bytes if needed.
+     *   art_quick_read_barrier_mark_introspection_gc_roots:
+     *     GC root entrypoint code.
+     *     Padding to 256 bytes if needed.
+     *   art_quick_read_barrier_mark_introspection_arrays:
+     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
+     */
+    .balign 512
+ENTRY art_quick_read_barrier_mark_introspection
+    // At this point, IP contains the reference, R4 can be freely used.
+    // (R4 is reserved for the entrypoint address.)
+    // For heap poisoning, the reference is poisoned, so unpoison it first.
+    UNPOISON_HEAP_REF ip
+    // If reference is null, just return it in the right register.
+    cmp     ip, #0
+    beq     .Lmark_introspection_return
+    // Use R4 as temp and check the mark bit of the reference.
+    ldr     r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tst     r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    beq     .Lmark_introspection_unmarked
+.Lmark_introspection_return:
+    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
+    ldrh    r4, [lr, #(-1 + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + 2)]
+    lsr     r4, r4, #12               // Extract `ref_reg`.
+.Lmark_introspection_return_switch:
+    tbb     [pc, r4]                  // Jump to the switch case.
+.Lmark_introspection_return_table:
+    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
+    .balign 16
+    BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
+
+    .balign 16
+.Lmark_introspection_unmarked:
+    // Check if the top two bits are one, if this is the case it is a forwarding address.
+#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
+    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
+    // the highest bits and the "forwarding address" state to have all bits set.
+#error "Unexpected lock word state shift or forwarding address state value."
+#endif
+    cmp     r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+    bhs     .Lmark_introspection_forwarding_address
+    BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
+
+    .balign 8
+.Lmark_introspection_forwarding_address:
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    lsl     ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    b       .Lmark_introspection_return
+
+    .balign 64
+    .thumb_func
+    .type art_quick_read_barrier_mark_introspection_gc_roots, #function
+    .hidden art_quick_read_barrier_mark_introspection_gc_roots
+    .global art_quick_read_barrier_mark_introspection_gc_roots
+art_quick_read_barrier_mark_introspection_gc_roots:
+    BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
+
+    .balign 256
+    .thumb_func
+    .type art_quick_read_barrier_mark_introspection_arrays, #function
+    .hidden art_quick_read_barrier_mark_introspection_arrays
+    .global art_quick_read_barrier_mark_introspection_arrays
+art_quick_read_barrier_mark_introspection_arrays:
+    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
+END art_quick_read_barrier_mark_introspection
+
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index bc7bcb1..610cdee 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -75,7 +75,7 @@
 extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*);
 
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
   // ARM64 is the architecture with the largest number of core
   // registers (32) that supports the read barrier configuration.
   // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
@@ -85,35 +85,35 @@
   // have less core registers (resp. 16, 8 and 16).  (We may have to
   // revise that design choice if read barrier support is added for
   // MIPS and/or MIPS64.)
-  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
-  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
-  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
-  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
-  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
-  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
-  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
-  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
-  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
-  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
-  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
-  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
-  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
-  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
-  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
-  qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr;
-  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
-  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
-  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
-  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
-  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
-  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
-  qpoints->pReadBarrierMarkReg23 = is_marking ? art_quick_read_barrier_mark_reg23 : nullptr;
-  qpoints->pReadBarrierMarkReg24 = is_marking ? art_quick_read_barrier_mark_reg24 : nullptr;
-  qpoints->pReadBarrierMarkReg25 = is_marking ? art_quick_read_barrier_mark_reg25 : nullptr;
-  qpoints->pReadBarrierMarkReg26 = is_marking ? art_quick_read_barrier_mark_reg26 : nullptr;
-  qpoints->pReadBarrierMarkReg27 = is_marking ? art_quick_read_barrier_mark_reg27 : nullptr;
-  qpoints->pReadBarrierMarkReg28 = is_marking ? art_quick_read_barrier_mark_reg28 : nullptr;
-  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+  qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg15 = is_active ? art_quick_read_barrier_mark_reg15 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg23 = is_active ? art_quick_read_barrier_mark_reg23 : nullptr;
+  qpoints->pReadBarrierMarkReg24 = is_active ? art_quick_read_barrier_mark_reg24 : nullptr;
+  qpoints->pReadBarrierMarkReg25 = is_active ? art_quick_read_barrier_mark_reg25 : nullptr;
+  qpoints->pReadBarrierMarkReg26 = is_active ? art_quick_read_barrier_mark_reg26 : nullptr;
+  qpoints->pReadBarrierMarkReg27 = is_active ? art_quick_read_barrier_mark_reg27 : nullptr;
+  qpoints->pReadBarrierMarkReg28 = is_active ? art_quick_read_barrier_mark_reg28 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr;
 
   // Check that array switch cases are at appropriate offsets from the introspection entrypoint.
   DCHECK_ALIGNED(art_quick_read_barrier_mark_introspection, 512u);
@@ -128,7 +128,7 @@
   DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff);
   // The register 16, i.e. IP0, is reserved, so there is no art_quick_read_barrier_mark_reg16.
   // We're using the entry to hold a pointer to the introspection entrypoint instead.
-  qpoints->pReadBarrierMarkReg16 = is_marking ? art_quick_read_barrier_mark_introspection : nullptr;
+  qpoints->pReadBarrierMarkReg16 = is_active ? art_quick_read_barrier_mark_introspection : nullptr;
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
@@ -188,7 +188,7 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierMarkReg16 = nullptr;  // IP0 is used as a temp by the asm stub.
-  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false);
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d043962..c555126 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1416,7 +1416,7 @@
     POISON_HEAP_REF w2
     str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
-    lsr x0, x0, #7
+    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
     strb w3, [x3, x0]
     ret
 .Ldo_aput_null:
@@ -1447,7 +1447,7 @@
     POISON_HEAP_REF w2
     str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
-    lsr x0, x0, #7
+    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
     strb w3, [x3, x0]
     ret
     .cfi_restore_state            // Reset unwind info so following code unwinds.
diff --git a/runtime/arch/context-inl.h b/runtime/arch/context-inl.h
new file mode 100644
index 0000000..ddcbbb1
--- /dev/null
+++ b/runtime/arch/context-inl.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is special-purpose for cases where you want a stack context. Most users should use
+// Context::Create().
+
+#include "context.h"
+
+#ifndef ART_RUNTIME_ARCH_CONTEXT_INL_H_
+#define ART_RUNTIME_ARCH_CONTEXT_INL_H_
+
+#if defined(__arm__)
+#include "arm/context_arm.h"
+#define RUNTIME_CONTEXT_TYPE arm::ArmContext
+#elif defined(__aarch64__)
+#include "arm64/context_arm64.h"
+#define RUNTIME_CONTEXT_TYPE arm64::Arm64Context
+#elif defined(__mips__) && !defined(__LP64__)
+#include "mips/context_mips.h"
+#define RUNTIME_CONTEXT_TYPE mips::MipsContext
+#elif defined(__mips__) && defined(__LP64__)
+#include "mips64/context_mips64.h"
+#define RUNTIME_CONTEXT_TYPE mips64::Mips64Context
+#elif defined(__i386__)
+#include "x86/context_x86.h"
+#define RUNTIME_CONTEXT_TYPE x86::X86Context
+#elif defined(__x86_64__)
+#include "x86_64/context_x86_64.h"
+#define RUNTIME_CONTEXT_TYPE x86_64::X86_64Context
+#else
+#error unimplemented
+#endif
+
+namespace art {
+
+using RuntimeContextType = RUNTIME_CONTEXT_TYPE;
+
+}  // namespace art
+
+#undef RUNTIME_CONTEXT_TYPE
+
+#endif  // ART_RUNTIME_ARCH_CONTEXT_INL_H_
diff --git a/runtime/arch/context.cc b/runtime/arch/context.cc
index bf40a3f..82d8b6c 100644
--- a/runtime/arch/context.cc
+++ b/runtime/arch/context.cc
@@ -14,43 +14,12 @@
  * limitations under the License.
  */
 
-#include "context.h"
-
-#if defined(__arm__)
-#include "arm/context_arm.h"
-#elif defined(__aarch64__)
-#include "arm64/context_arm64.h"
-#elif defined(__mips__) && !defined(__LP64__)
-#include "mips/context_mips.h"
-#elif defined(__mips__) && defined(__LP64__)
-#include "mips64/context_mips64.h"
-#elif defined(__i386__)
-#include "x86/context_x86.h"
-#elif defined(__x86_64__)
-#include "x86_64/context_x86_64.h"
-#else
-#include "base/logging.h"
-#endif
+#include "context-inl.h"
 
 namespace art {
 
 Context* Context::Create() {
-#if defined(__arm__)
-  return new arm::ArmContext();
-#elif defined(__aarch64__)
-  return new arm64::Arm64Context();
-#elif defined(__mips__) && !defined(__LP64__)
-  return new mips::MipsContext();
-#elif defined(__mips__) && defined(__LP64__)
-  return new mips64::Mips64Context();
-#elif defined(__i386__)
-  return new x86::X86Context();
-#elif defined(__x86_64__)
-  return new x86_64::X86_64Context();
-#else
-  UNIMPLEMENTED(FATAL);
-  return nullptr;
-#endif
+  return new RuntimeContextType;
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 434e33c..9978da5 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -86,68 +86,68 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
-  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
+  qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
                 "Non-direct C stub marked direct.");
 }
@@ -160,7 +160,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false);
+  ResetQuickAllocEntryPoints(qpoints, /*is_active*/ false);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
@@ -412,7 +412,7 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct.");
-  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false);
   // Cannot use the following registers to pass arguments:
   // 0(ZERO), 1(AT), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA).
   // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8).
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 722a679..7bbcbf0 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -421,7 +421,7 @@
     SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
 .endm
 
-.macro RESTORE_SAVE_EVERYTHING_FRAME
+.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
 
@@ -490,8 +490,10 @@
     .cfi_restore 6
     lw     $a1, 160($sp)
     .cfi_restore 5
+    .if \restore_a0
     lw     $a0, 156($sp)
     .cfi_restore 4
+    .endif
     lw     $v1, 152($sp)
     .cfi_restore 3
     lw     $v0, 148($sp)
@@ -507,16 +509,26 @@
 .endm
 
     /*
-     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
-     * exception is Thread::Current()->exception_
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
+     * Requires $gp properly set up.
      */
-.macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
+.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
     la      $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
 .endm
 
+    /*
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     * Requires $gp properly set up.
+     */
+.macro DELIVER_PENDING_EXCEPTION
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+.endm
+
 .macro RETURN_IF_NO_EXCEPTION
     lw     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     RESTORE_SAVE_REFS_ONLY_FRAME
@@ -1406,7 +1418,7 @@
     POISON_HEAP_REF $a2
     sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
-    srl $t1, $a0, 7
+    srl $t1, $a0, CARD_TABLE_CARD_SHIFT
     add $t1, $t1, $t0
     sb  $t0, ($t1)
     jalr $zero, $ra
@@ -1660,30 +1672,51 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 
+// Macro for string and type resolution and initialization.
+// $a0 is both input and output.
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
+    .extern \entrypoint
+ENTRY_NO_GP \name
+    SETUP_SAVE_EVERYTHING_FRAME       # Save everything in case of GC.
+    move    $s2, $gp                  # Preserve $gp across the call for exception delivery.
+    la      $t9, \entrypoint
+    jalr    $t9                       # (uint32_t index, Thread*)
+    move    $a1, rSELF                # Pass Thread::Current (in delay slot).
+    beqz    $v0, 1f                   # Success?
+    move    $a0, $v0                  # Move result to $a0 (in delay slot).
+    RESTORE_SAVE_EVERYTHING_FRAME 0   # Restore everything except $a0.
+    jalr    $zero, $ra                # Return on success.
+    nop
+1:
+    move    $gp, $s2
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END \name
+.endm
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
      * path check for hit in strings cache has already been performed.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
 
     /*
      * Entry from managed code when dex cache misses for a type_idx.
      */
-ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
 
     /*
      * Entry from managed code when type_idx needs to be checked for access and dex cache may also
      * miss.
      */
-ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
@@ -1854,7 +1887,8 @@
     nop
 
 2:
-    lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+    lw      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+    move    $gp, $s3               # restore $gp from $s3
     # This will create a new save-all frame, required by the runtime.
     DELIVER_PENDING_EXCEPTION
 END art_quick_generic_jni_trampoline
@@ -2213,8 +2247,32 @@
      */
 .macro READ_BARRIER_MARK_REG name, reg
 ENTRY \name
-    /* TODO: optimizations: mark bit, forwarding. */
-    addiu   $sp, $sp, -160      # includes 16 bytes of space for argument registers a0-a3
+    // Null check so that we can load the lock word.
+    bnez    \reg, .Lnot_null_\name
+    nop
+.Lret_rb_\name:
+    jalr    $zero, $ra
+    nop
+.Lnot_null_\name:
+    // Check lock word for mark bit, if marked return.
+    lw      $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg)
+    .set push
+    .set noat
+    sll     $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT     # Move mark bit to sign bit.
+    bltz    $at, .Lret_rb_\name
+#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
+    // The below code depends on the lock word state being in the highest bits
+    // and the "forwarding address" state having all bits set.
+#error "Unexpected lock word state shift or forwarding address state value."
+#endif
+    // Test that both the forwarding state bits are 1.
+    sll     $at, $t9, 1
+    and     $at, $at, $t9                               # Sign bit = 1 IFF both bits are 1.
+    bltz    $at, .Lret_forwarding_address\name
+    nop
+    .set pop
+
+    addiu   $sp, $sp, -160      # Includes 16 bytes of space for argument registers a0-a3.
     .cfi_adjust_cfa_offset 160
 
     sw      $ra, 156($sp)
@@ -2319,6 +2377,12 @@
     jalr    $zero, $ra
     addiu   $sp, $sp, 160
     .cfi_adjust_cfa_offset -160
+
+.Lret_forwarding_address\name:
+    jalr    $zero, $ra
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    sll     \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
 END \name
 .endm
 
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index f8242ae..763d93e 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -86,27 +86,27 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 // No read barrier entrypoints for marking registers.
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
-  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
-  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
-  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
-  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
-  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
-  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
-  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
-  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
-  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
-  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
-  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
-  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
-  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
-  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
-  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
-  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
-  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
-  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
-  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
-  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
+  qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr;
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
@@ -168,7 +168,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false);
   // Cannot use the following registers to pass arguments:
   // 0(ZERO), 1(AT), 15(T3), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA).
   // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8).
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 9402232..8f713a1 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -447,7 +447,7 @@
     SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
 .endm
 
-.macro RESTORE_SAVE_EVERYTHING_FRAME
+.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1
     // Restore FP registers.
     l.d    $f31, 264($sp)
     l.d    $f30, 256($sp)
@@ -530,8 +530,10 @@
     .cfi_restore 6
     ld     $a1,  304($sp)
     .cfi_restore 5
+    .if \restore_a0
     ld     $a0,  296($sp)
     .cfi_restore 4
+    .endif
     ld     $v1,  288($sp)
     .cfi_restore 3
     ld     $v0,  280($sp)
@@ -547,16 +549,24 @@
 .endm
 
     /*
-     * Macro that set calls through to artDeliverPendingExceptionFromCode,
-     * where the pending
-     * exception is Thread::Current()->exception_
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
+     * Requires $gp properly set up.
+     */
+.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
+    dla     $t9, artDeliverPendingExceptionFromCode
+    jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
+    move    $a0, rSELF                   # pass Thread::Current
+.endm
+
+    /*
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
      */
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
-    dla     $t9, artDeliverPendingExceptionFromCode
-    jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
-    move    $a0, rSELF                   # pass Thread::Current
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
 .endm
 
 .macro RETURN_IF_NO_EXCEPTION
@@ -1374,7 +1384,7 @@
     POISON_HEAP_REF $a2
     sw   $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     ld   $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
-    dsrl  $t1, $a0, 7
+    dsrl  $t1, $a0, CARD_TABLE_CARD_SHIFT
     daddu $t1, $t1, $t0
     sb   $t0, ($t1)
     jalr $zero, $ra
@@ -1615,30 +1625,48 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 
+// Macro for string and type resolution and initialization.
+// $a0 is both input and output.
+.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
+    .extern \entrypoint
+ENTRY_NO_GP \name
+    SETUP_SAVE_EVERYTHING_FRAME       # Save everything in case of GC.
+    dla     $t9, \entrypoint
+    jalr    $t9                       # (uint32_t index, Thread*)
+    move    $a1, rSELF                # Pass Thread::Current (in delay slot).
+    beqz    $v0, 1f                   # Success?
+    move    $a0, $v0                  # Move result to $a0 (in delay slot).
+    RESTORE_SAVE_EVERYTHING_FRAME 0   # Restore everything except $a0.
+    jic     $ra, 0                    # Return on success.
+1:
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END \name
+.endm
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
      * path check for hit in strings cache has already been performed.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
 
     /*
      * Entry from managed code when dex cache misses for a type_idx.
      */
-ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
 
     /*
      * Entry from managed code when type_idx needs to be checked for access and dex cache may also
      * miss.
      */
-ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
@@ -2067,7 +2095,29 @@
      */
 .macro READ_BARRIER_MARK_REG name, reg
 ENTRY \name
-    /* TODO: optimizations: mark bit, forwarding. */
+    // Null check so that we can load the lock word.
+    bnezc   \reg, .Lnot_null_\name
+    nop
+.Lret_rb_\name:
+    jic     $ra, 0
+.Lnot_null_\name:
+    // Check lock word for mark bit, if marked return.
+    lw      $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg)
+    .set push
+    .set noat
+    sll     $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT     # Move mark bit to sign bit.
+    bltzc   $at, .Lret_rb_\name
+#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
+    // The below code depends on the lock word state being in the highest bits
+    // and the "forwarding address" state having all bits set.
+#error "Unexpected lock word state shift or forwarding address state value."
+#endif
+    // Test that both the forwarding state bits are 1.
+    sll     $at, $t9, 1
+    and     $at, $at, $t9                               # Sign bit = 1 IFF both bits are 1.
+    bltzc   $at, .Lret_forwarding_address\name
+    .set pop
+
     daddiu  $sp, $sp, -320
     .cfi_adjust_cfa_offset 320
 
@@ -2202,6 +2252,13 @@
     jalr    $zero, $ra
     daddiu  $sp, $sp, 320
     .cfi_adjust_cfa_offset -320
+
+.Lret_forwarding_address\name:
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    sll     \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    jalr    $zero, $ra
+    dext    \reg, \reg, 0, 32   # Make sure the address is zero-extended.
 END \name
 .endm
 
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 9cd4a3e..102faf1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -44,14 +44,14 @@
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
-  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
-  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
-  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
-  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
-  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
-  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
-  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
+  qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr;
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
@@ -97,7 +97,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
   // x86 has only 8 core registers.
   qpoints->pReadBarrierMarkReg08 = nullptr;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6c0bcc9..3694c3e 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1526,7 +1526,7 @@
     POISON_HEAP_REF edx
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
     movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
-    shrl LITERAL(7), %eax
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax
     movb %dl, (%edx, %eax)
     ret
 .Ldo_aput_null:
@@ -1567,7 +1567,7 @@
     POISON_HEAP_REF edx
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
     movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
-    shrl LITERAL(7), %eax
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax
     movb %dl, (%edx, %eax)
     ret
     CFI_ADJUST_CFA_OFFSET(12)     // 3 POP after the jz for unwinding.
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index a326b4e..1e56e8a 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -55,22 +55,22 @@
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
-  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
-  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
-  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
-  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
-  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
-  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
-  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
-  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
-  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
-  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
-  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
-  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
-  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
-  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
-  qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr;
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
+  qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg15 = is_active ? art_quick_read_barrier_mark_reg15 : nullptr;
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
@@ -119,7 +119,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
   // x86-64 has only 16 core registers.
   qpoints->pReadBarrierMarkReg16 = nullptr;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8e2acab..ad7c2b3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1504,8 +1504,8 @@
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
-    shrl LITERAL(7), %edi
-//  shrl LITERAL(7), %rdi
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
+//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
     ret
 .Ldo_aput_null:
@@ -1545,8 +1545,8 @@
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
-    shrl LITERAL(7), %edi
-//  shrl LITERAL(7), %rdi
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
+//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
 //  movb %dl, (%rdx, %rdi)
     ret
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 935fd81..136ed12 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -33,6 +33,7 @@
 
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
+  // Every name should have the same width and end with a space. Abbreviate if necessary:
   "Misc         ",
   "SwitchTbl    ",
   "SlowPaths    ",
@@ -49,6 +50,7 @@
   "Successors   ",
   "Dominated    ",
   "Instruction  ",
+  "CtorFenceIns ",
   "InvokeInputs ",
   "PhiInputs    ",
   "LoopInfo     ",
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index c39429c..60b6ea8 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -59,6 +59,7 @@
   kArenaAllocSuccessors,
   kArenaAllocDominated,
   kArenaAllocInstruction,
+  kArenaAllocConstructorFenceInputs,
   kArenaAllocInvokeInputs,
   kArenaAllocPhiInputs,
   kArenaAllocLoopInfo,
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index 6b67864..c5b0af6 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -98,7 +98,9 @@
       // Check that the value is within the upper limit of Dest.
       (static_cast<uintmax_t>(std::numeric_limits<Dest>::max()) >=
           static_cast<uintmax_t>(std::numeric_limits<Source>::max()) ||
-          source <= static_cast<Source>(std::numeric_limits<Dest>::max())));
+          source <= static_cast<Source>(std::numeric_limits<Dest>::max())))
+      << "dchecked_integral_cast failed for " << source
+      << " (would be " << static_cast<Dest>(source) << ")";
 
   return static_cast<Dest>(source);
 }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 2414b5f..03ae63a 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -373,19 +373,19 @@
   bool IsSharedHeld(const Thread* self) const;
 
   // Assert the current thread has shared access to the ReaderWriterMutex.
-  void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
+  ALWAYS_INLINE void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
     if (kDebugLocking && (gAborting == 0)) {
       // TODO: we can only assert this well when self != null.
       CHECK(IsSharedHeld(self) || self == nullptr) << *this;
     }
   }
-  void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
+  ALWAYS_INLINE void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
     AssertSharedHeld(self);
   }
 
   // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive
   // mode.
-  void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) {
+  ALWAYS_INLINE void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) {
     if (kDebugLocking && (gAborting == 0)) {
       CHECK(!IsSharedHeld(self)) << *this;
     }
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index d5f375a..cfe27f3 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -194,6 +194,17 @@
   to_update.insert(other.begin(), other.end());
 }
 
+// Returns a copy of the passed vector that doesn't memory-own its entries.
+template <typename T>
+static inline std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
+  std::vector<T*> result;
+  result.reserve(src.size());
+  for (const std::unique_ptr<T>& t : src) {
+    result.push_back(t.get());
+  }
+  return result;
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 56e8aa3..a29cc6c 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -249,6 +249,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_MEMORY_TOOL_ASAN() \
+  if (RUNNING_ON_MEMORY_TOOL > 0 && !kMemoryToolIsValgrind) { \
+    printf("WARNING: TEST DISABLED FOR MEMORY TOOL ASAN\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
index dc55ab8..4847f38 100644
--- a/runtime/compiler_filter.cc
+++ b/runtime/compiler_filter.cc
@@ -20,17 +20,15 @@
 
 namespace art {
 
-bool CompilerFilter::IsBytecodeCompilationEnabled(Filter filter) {
+bool CompilerFilter::IsAotCompilationEnabled(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone:
-    case CompilerFilter::kVerifyAtRuntime:
-    case CompilerFilter::kVerifyProfile:
-    case CompilerFilter::kInterpretOnly: return false;
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify:
+    case CompilerFilter::kQuicken: return false;
 
     case CompilerFilter::kSpaceProfile:
     case CompilerFilter::kSpace:
-    case CompilerFilter::kBalanced:
-    case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
     case CompilerFilter::kEverythingProfile:
@@ -41,15 +39,13 @@
 
 bool CompilerFilter::IsJniCompilationEnabled(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone:
-    case CompilerFilter::kVerifyAtRuntime: return false;
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify: return false;
 
-    case CompilerFilter::kVerifyProfile:
-    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kQuicken:
     case CompilerFilter::kSpaceProfile:
     case CompilerFilter::kSpace:
-    case CompilerFilter::kBalanced:
-    case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
     case CompilerFilter::kEverythingProfile:
@@ -58,17 +54,15 @@
   UNREACHABLE();
 }
 
-bool CompilerFilter::IsAnyMethodCompilationEnabled(Filter filter) {
+bool CompilerFilter::IsQuickeningCompilationEnabled(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone:
-    case CompilerFilter::kVerifyAtRuntime:
-    case CompilerFilter::kVerifyProfile: return false;
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify: return false;
 
-    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kQuicken:
     case CompilerFilter::kSpaceProfile:
     case CompilerFilter::kSpace:
-    case CompilerFilter::kBalanced:
-    case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
     case CompilerFilter::kEverythingProfile:
@@ -77,17 +71,21 @@
   UNREACHABLE();
 }
 
+bool CompilerFilter::IsAnyCompilationEnabled(Filter filter) {
+  return IsJniCompilationEnabled(filter) ||
+      IsQuickeningCompilationEnabled(filter) ||
+      IsAotCompilationEnabled(filter);
+}
+
 bool CompilerFilter::IsVerificationEnabled(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone:
-    case CompilerFilter::kVerifyAtRuntime: return false;
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract: return false;
 
-    case CompilerFilter::kVerifyProfile:
-    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kVerify:
+    case CompilerFilter::kQuicken:
     case CompilerFilter::kSpaceProfile:
     case CompilerFilter::kSpace:
-    case CompilerFilter::kBalanced:
-    case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
     case CompilerFilter::kEverythingProfile:
@@ -104,19 +102,14 @@
 
 bool CompilerFilter::DependsOnProfile(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone:
-    case CompilerFilter::kVerifyAtRuntime:
-    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify:
+    case CompilerFilter::kQuicken:
     case CompilerFilter::kSpace:
-    case CompilerFilter::kBalanced:
-    case CompilerFilter::kTime:
     case CompilerFilter::kSpeed:
     case CompilerFilter::kEverything: return false;
 
-    // verify-profile doesn't look at profiles anymore.
-    // TODO(ngeoffray): this will be cleaned up with b/34715556.
-    case CompilerFilter::kVerifyProfile: return false;
-
     case CompilerFilter::kSpaceProfile:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kEverythingProfile: return true;
@@ -126,21 +119,15 @@
 
 CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone:
-    case CompilerFilter::kVerifyAtRuntime:
-    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify:
+    case CompilerFilter::kQuicken:
     case CompilerFilter::kSpace:
-    case CompilerFilter::kBalanced:
-    case CompilerFilter::kTime:
     case CompilerFilter::kSpeed:
     case CompilerFilter::kEverything:
       return filter;
 
-    case CompilerFilter::kVerifyProfile:
-      // verify-profile doesn't look at profiles anymore.
-      // TODO(ngeoffray): this will be cleaned up with b/34715556.
-      return filter;
-
     case CompilerFilter::kSpaceProfile:
       return CompilerFilter::kSpace;
 
@@ -153,6 +140,26 @@
   UNREACHABLE();
 }
 
+CompilerFilter::Filter CompilerFilter::GetSafeModeFilterFrom(Filter filter) {
+  // For safe mode, we should not return a filter that generates AOT compiled
+  // code.
+  switch (filter) {
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify:
+    case CompilerFilter::kQuicken:
+      return filter;
+
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverything:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kEverythingProfile:
+      return CompilerFilter::kQuicken;
+  }
+  UNREACHABLE();
+}
 
 bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) {
   return current >= target;
@@ -160,14 +167,12 @@
 
 std::string CompilerFilter::NameOfFilter(Filter filter) {
   switch (filter) {
-    case CompilerFilter::kVerifyNone: return "verify-none";
-    case CompilerFilter::kVerifyAtRuntime: return "verify-at-runtime";
-    case CompilerFilter::kVerifyProfile: return "verify-profile";
-    case CompilerFilter::kInterpretOnly: return "interpret-only";
+    case CompilerFilter::kAssumeVerified: return "assume-verified";
+    case CompilerFilter::kExtract: return "extract";
+    case CompilerFilter::kVerify: return "verify";
+    case CompilerFilter::kQuicken: return "quicken";
     case CompilerFilter::kSpaceProfile: return "space-profile";
     case CompilerFilter::kSpace: return "space";
-    case CompilerFilter::kBalanced: return "balanced";
-    case CompilerFilter::kTime: return "time";
     case CompilerFilter::kSpeedProfile: return "speed-profile";
     case CompilerFilter::kSpeed: return "speed";
     case CompilerFilter::kEverythingProfile: return "everything-profile";
@@ -180,19 +185,41 @@
   CHECK(filter != nullptr);
 
   if (strcmp(option, "verify-none") == 0) {
-    *filter = kVerifyNone;
+    LOG(WARNING) << "'verify-none' is an obsolete compiler filter name that will be "
+                 << "removed in future releases, please use 'assume-verified' instead.";
+    *filter = kAssumeVerified;
   } else if (strcmp(option, "interpret-only") == 0) {
-    *filter = kInterpretOnly;
+    LOG(WARNING) << "'interpret-only' is an obsolete compiler filter name that will be "
+                 << "removed in future releases, please use 'quicken' instead.";
+    *filter = kQuicken;
   } else if (strcmp(option, "verify-profile") == 0) {
-    *filter = kVerifyProfile;
+    LOG(WARNING) << "'verify-profile' is an obsolete compiler filter name that will be "
+                 << "removed in future releases, please use 'verify' instead.";
+    *filter = kVerify;
   } else if (strcmp(option, "verify-at-runtime") == 0) {
-    *filter = kVerifyAtRuntime;
+    LOG(WARNING) << "'verify-at-runtime' is an obsolete compiler filter name that will be "
+                 << "removed in future releases, please use 'extract' instead.";
+    *filter = kExtract;
+  } else if (strcmp(option, "balanced") == 0) {
+    LOG(WARNING) << "'balanced' is an obsolete compiler filter name that will be "
+                 << "removed in future releases, please use 'speed' instead.";
+    *filter = kSpeed;
+  } else if (strcmp(option, "time") == 0) {
+    LOG(WARNING) << "'time' is an obsolete compiler filter name that will be "
+                 << "removed in future releases, please use 'space' instead.";
+    *filter = kSpace;
+  } else if (strcmp(option, "assume-verified") == 0) {
+    *filter = kAssumeVerified;
+  } else if (strcmp(option, "extract") == 0) {
+    *filter = kExtract;
+  } else if (strcmp(option, "verify") == 0) {
+    *filter = kVerify;
+  } else if (strcmp(option, "quicken") == 0) {
+    *filter = kQuicken;
   } else if (strcmp(option, "space") == 0) {
     *filter = kSpace;
   } else if (strcmp(option, "space-profile") == 0) {
     *filter = kSpaceProfile;
-  } else if (strcmp(option, "balanced") == 0) {
-    *filter = kBalanced;
   } else if (strcmp(option, "speed") == 0) {
     *filter = kSpeed;
   } else if (strcmp(option, "speed-profile") == 0) {
@@ -201,8 +228,6 @@
     *filter = kEverything;
   } else if (strcmp(option, "everything-profile") == 0) {
     *filter = kEverythingProfile;
-  } else if (strcmp(option, "time") == 0) {
-    *filter = kTime;
   } else {
     return false;
   }
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index 796f4aa..f802439 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -30,14 +30,12 @@
   // Note: Order here matters. Later filter choices are considered "as good
   // as" earlier filter choices.
   enum Filter {
-    kVerifyNone,          // Skip verification but mark all classes as verified anyway.
-    kVerifyAtRuntime,     // Delay verication to runtime, do not compile anything.
-    kVerifyProfile,       // Verify only the classes in the profile, compile only JNI stubs.
-    kInterpretOnly,       // Verify everything, compile only JNI stubs.
-    kTime,                // Compile methods, but minimize compilation time.
+    kAssumeVerified,      // Skip verification but mark all classes as verified anyway.
+    kExtract,             // Delay verication to runtime, do not compile anything.
+    kVerify,              // Only verify classes.
+    kQuicken,             // Verify, quicken, and compile JNI stubs.
     kSpaceProfile,        // Maximize space savings based on profile.
     kSpace,               // Maximize space savings.
-    kBalanced,            // Good performance return on compilation investment.
     kSpeedProfile,        // Maximize runtime performance based on profile.
     kSpeed,               // Maximize runtime performance.
     kEverythingProfile,   // Compile everything capable of being compiled based on profile.
@@ -48,17 +46,21 @@
 
   // Returns true if an oat file with this compiler filter contains
   // compiled executable code for bytecode.
-  static bool IsBytecodeCompilationEnabled(Filter filter);
+  static bool IsAotCompilationEnabled(Filter filter);
 
   // Returns true if an oat file with this compiler filter contains
   // compiled executable code for bytecode, JNI methods, or quickened dex
   // bytecode.
-  static bool IsAnyMethodCompilationEnabled(Filter filter);
+  static bool IsAnyCompilationEnabled(Filter filter);
 
   // Returns true if an oat file with this compiler filter contains
   // compiled executable code for JNI methods.
   static bool IsJniCompilationEnabled(Filter filter);
 
+  // Returns true if an oat file with this compiler filter contains
+  // quickened dex bytecode.
+  static bool IsQuickeningCompilationEnabled(Filter filter);
+
   // Returns true if this compiler filter requires running verification.
   static bool IsVerificationEnabled(Filter filter);
 
@@ -73,6 +75,9 @@
   // Returns a non-profile-guided version of the given filter.
   static Filter GetNonProfileDependentFilterFrom(Filter filter);
 
+  // Returns a filter suitable for safe mode.
+  static Filter GetSafeModeFilterFrom(Filter filter);
+
   // Returns true if the 'current' compiler filter is considered at least as
   // good as the 'target' compilation type.
   // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is
diff --git a/runtime/compiler_filter_test.cc b/runtime/compiler_filter_test.cc
index c603be6..383f4e3 100644
--- a/runtime/compiler_filter_test.cc
+++ b/runtime/compiler_filter_test.cc
@@ -28,19 +28,24 @@
   EXPECT_EQ(name, CompilerFilter::NameOfFilter(filter));
 }
 
+static void TestSafeModeFilter(CompilerFilter::Filter expected, std::string name) {
+  CompilerFilter::Filter parsed;
+  EXPECT_TRUE(CompilerFilter::ParseCompilerFilter(name.c_str(), &parsed));
+  EXPECT_EQ(expected, CompilerFilter::GetSafeModeFilterFrom(parsed));
+}
+
+
 // Verify the dexopt status values from dalvik.system.DexFile
 // match the OatFileAssistant::DexOptStatus values.
 TEST(CompilerFilterTest, ParseCompilerFilter) {
   CompilerFilter::Filter filter;
 
-  TestCompilerFilterName(CompilerFilter::kVerifyNone, "verify-none");
-  TestCompilerFilterName(CompilerFilter::kVerifyAtRuntime, "verify-at-runtime");
-  TestCompilerFilterName(CompilerFilter::kVerifyProfile, "verify-profile");
-  TestCompilerFilterName(CompilerFilter::kInterpretOnly, "interpret-only");
-  TestCompilerFilterName(CompilerFilter::kTime, "time");
+  TestCompilerFilterName(CompilerFilter::kAssumeVerified, "assume-verified");
+  TestCompilerFilterName(CompilerFilter::kExtract, "extract");
+  TestCompilerFilterName(CompilerFilter::kVerify, "verify");
+  TestCompilerFilterName(CompilerFilter::kQuicken, "quicken");
   TestCompilerFilterName(CompilerFilter::kSpaceProfile, "space-profile");
   TestCompilerFilterName(CompilerFilter::kSpace, "space");
-  TestCompilerFilterName(CompilerFilter::kBalanced, "balanced");
   TestCompilerFilterName(CompilerFilter::kSpeedProfile, "speed-profile");
   TestCompilerFilterName(CompilerFilter::kSpeed, "speed");
   TestCompilerFilterName(CompilerFilter::kEverythingProfile, "everything-profile");
@@ -49,4 +54,17 @@
   EXPECT_FALSE(CompilerFilter::ParseCompilerFilter("super-awesome-filter", &filter));
 }
 
+TEST(CompilerFilterTest, SafeModeFilter) {
+  TestSafeModeFilter(CompilerFilter::kAssumeVerified, "assume-verified");
+  TestSafeModeFilter(CompilerFilter::kExtract, "extract");
+  TestSafeModeFilter(CompilerFilter::kVerify, "verify");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "quicken");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "space-profile");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "space");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "speed-profile");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "speed");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "everything-profile");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "everything");
+}
+
 }  // namespace art
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 63794bf..d0b50fe 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1092,6 +1092,23 @@
   return JDWP::ERR_NONE;
 }
 
+JDWP::JdwpError Dbg::GetSourceDebugExtension(JDWP::RefTypeId class_id,
+                                             std::string* extension_data) {
+  JDWP::JdwpError error;
+  mirror::Class* c = DecodeClass(class_id, &error);
+  if (c == nullptr) {
+    return error;
+  }
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::Class> klass(hs.NewHandle(c));
+  const char* data = annotations::GetSourceDebugExtension(klass);
+  if (data == nullptr) {
+    return JDWP::ERR_ABSENT_INFORMATION;
+  }
+  *extension_data = data;
+  return JDWP::ERR_NONE;
+}
+
 JDWP::JdwpError Dbg::GetSourceFile(JDWP::RefTypeId class_id, std::string* result) {
   JDWP::JdwpError error;
   mirror::Class* c = DecodeClass(class_id, &error);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 27124e1..4f3ff40 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -288,6 +288,9 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   static JDWP::JdwpError GetSignature(JDWP::RefTypeId ref_type_id, std::string* signature)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  static JDWP::JdwpError GetSourceDebugExtension(JDWP::RefTypeId ref_type_id,
+                                                 std::string* extension_data)
+      REQUIRES_SHARED(Locks::mutator_lock_);
   static JDWP::JdwpError GetSourceFile(JDWP::RefTypeId ref_type_id, std::string* source_file)
       REQUIRES_SHARED(Locks::mutator_lock_);
   static JDWP::JdwpError GetObjectTag(JDWP::ObjectId object_id, uint8_t* tag)
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
index e58c6f5..6765407 100644
--- a/runtime/dex2oat_environment_test.h
+++ b/runtime/dex2oat_environment_test.h
@@ -42,7 +42,16 @@
     CommonRuntimeTest::SetUp();
 
     // Create a scratch directory to work from.
-    scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest";
+
+    // Get the realpath of the android data. The oat dir should always point to real location
+    // when generating oat files in dalvik-cache. This avoids complicating the unit tests
+    // when matching the expected paths.
+    UniqueCPtr<const char[]> android_data_real(realpath(android_data_.c_str(), nullptr));
+    ASSERT_TRUE(android_data_real != nullptr)
+      << "Could not get the realpath of the android data" << android_data_ << strerror(errno);
+
+    scratch_dir_.assign(android_data_real.get());
+    scratch_dir_ += "/Dex2oatEnvironmentTest";
     ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700));
 
     // Create a subdirectory in scratch for odex files.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 625794e..eaf144b 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1044,7 +1044,7 @@
         }
 
         uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
-        uint32_t descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint16_t descriptor_idx = DecodeUnsignedLeb128P1(&stream);
         uint32_t signature_idx = kDexNoIndex;
         if (opcode == DBG_START_LOCAL_EXTENDED) {
           signature_idx = DecodeUnsignedLeb128P1(&stream);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 1b18d21..36c7341 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -92,8 +92,8 @@
     uint32_t method_ids_off_;  // file offset of MethodIds array
     uint32_t class_defs_size_;  // number of ClassDefs
     uint32_t class_defs_off_;  // file offset of ClassDef array
-    uint32_t data_size_;  // unused
-    uint32_t data_off_;  // unused
+    uint32_t data_size_;  // size of data section
+    uint32_t data_off_;  // file offset of data section
 
     // Decode the dex magic version
     uint32_t GetVersion() const;
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 6b9654d..1397916 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -1135,7 +1135,7 @@
 bool GetParametersMetadataForMethod(ArtMethod* method,
                                     MutableHandle<mirror::ObjectArray<mirror::String>>* names,
                                     MutableHandle<mirror::IntArray>* access_flags) {
-  const DexFile::AnnotationSetItem::AnnotationSetItem* annotation_set =
+  const DexFile::AnnotationSetItem* annotation_set =
       FindAnnotationSetForMethod(method);
   if (annotation_set == nullptr) {
     return false;
@@ -1420,6 +1420,40 @@
   return GetSignatureValue(data, annotation_set);
 }
 
+const char* GetSourceDebugExtension(Handle<mirror::Class> klass) {
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet(
+      data.GetDexFile(),
+      annotation_set,
+      "Ldalvik/annotation/SourceDebugExtension;",
+      DexFile::kDexVisibilitySystem);
+  if (annotation_item == nullptr) {
+    return nullptr;
+  }
+  const uint8_t* annotation =
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value");
+  if (annotation == nullptr) {
+    return nullptr;
+  }
+  DexFile::AnnotationValue annotation_value;
+  if (!ProcessAnnotationValue<false>(data,
+                                     &annotation,
+                                     &annotation_value,
+                                     ScopedNullHandle<mirror::Class>(),
+                                     DexFile::kAllRaw)) {
+    return nullptr;
+  }
+  if (annotation_value.type_ != DexFile::kDexAnnotationString) {
+    return nullptr;
+  }
+  dex::StringIndex index(static_cast<uint32_t>(annotation_value.value_.GetI()));
+  return data.GetDexFile().StringDataByIdx(index);
+}
+
 bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class) {
   ClassData data(klass);
   const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
diff --git a/runtime/dex_file_annotations.h b/runtime/dex_file_annotations.h
index c66c5bd..651c984 100644
--- a/runtime/dex_file_annotations.h
+++ b/runtime/dex_file_annotations.h
@@ -89,6 +89,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_);
 mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass)
     REQUIRES_SHARED(Locks::mutator_lock_);
+const char* GetSourceDebugExtension(Handle<mirror::Class> klass)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 bool IsClassAnnotationPresent(Handle<mirror::Class> klass,
                               Handle<mirror::Class> annotation_class)
     REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index f811287..6627550 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -171,6 +171,17 @@
   "AAACAAAAnAAAAAYAAAABAAAArAAAAAEgAAABAAAAzAAAAAIgAAAFAAAA5AAAAAMgAAABAAAAEAEA"
   "AAAgAAABAAAAFQEAAAAQAAABAAAAIAEAAA==";
 
+static const char kRawDexDebugInfoLocalNullType[] =
+    "ZGV4CjAzNQA+Kwj2g6OZMH88OvK9Ey6ycdIsFCt18ED8AQAAcAAAAHhWNBIAAAAAAAAAAHQBAAAI"
+    "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAMAQAA8AAAABwB"
+    "AAAkAQAALAEAAC8BAAA0AQAASAEAAEsBAABOAQAAAgAAAAMAAAAEAAAABQAAAAIAAAAAAAAAAAAA"
+    "AAUAAAADAAAAAAAAAAEAAQAAAAAAAQAAAAYAAAACAAEAAAAAAAEAAAABAAAAAgAAAAAAAAABAAAA"
+    "AAAAAGMBAAAAAAAAAQABAAEAAABUAQAABAAAAHAQAgAAAA4AAgABAAAAAABZAQAAAgAAABIQDwAG"
+    "PGluaXQ+AAZBLmphdmEAAUkAA0xBOwASTGphdmEvbGFuZy9PYmplY3Q7AAFWAAFhAAR0aGlzAAEA"
+    "Bw4AAwAHDh4DAAcAAAAAAQEAgYAE8AEBAIgCAAAACwAAAAAAAAABAAAAAAAAAAEAAAAIAAAAcAAA"
+    "AAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAAuAAAAAYAAAABAAAA0AAAAAEgAAACAAAA"
+    "8AAAAAIgAAAIAAAAHAEAAAMgAAACAAAAVAEAAAAgAAABAAAAYwEAAAAQAAABAAAAdAEAAA==";
+
 static void DecodeAndWriteDexFile(const char* base64, const char* location) {
   // decode base64
   CHECK(base64 != nullptr);
@@ -598,4 +609,17 @@
   EXPECT_EQ(raw->StringByTypeIdx(idx), nullptr);
 }
 
+static void Callback(void* context ATTRIBUTE_UNUSED,
+                     const DexFile::LocalInfo& entry ATTRIBUTE_UNUSED) {
+}
+
+TEST_F(DexFileTest, OpenDexDebugInfoLocalNullType) {
+  ScratchFile tmp;
+  std::unique_ptr<const DexFile> raw = OpenDexFileInMemoryBase64(
+      kRawDexDebugInfoLocalNullType, tmp.GetFilename().c_str(), 0xf25f2b38U, true);
+  const DexFile::ClassDef& class_def = raw->GetClassDef(0);
+  const DexFile::CodeItem* code_item = raw->GetCodeItem(raw->FindCodeItemOffset(class_def, 1));
+  ASSERT_TRUE(raw->DecodeDebugLocalInfo(code_item, true, 1, Callback, nullptr));
+}
+
 }  // namespace art
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 11b3cd0..c18ab47 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -922,12 +922,12 @@
   return true;
 }
 
-bool DexFileVerifier::FindClassFlags(uint32_t index,
-                                     bool is_field,
-                                     dex::TypeIndex* class_type_index,
-                                     uint32_t* class_access_flags) {
+bool DexFileVerifier::FindClassIndexAndDef(uint32_t index,
+                                           bool is_field,
+                                           dex::TypeIndex* class_type_index,
+                                           const DexFile::ClassDef** output_class_def) {
   DCHECK(class_type_index != nullptr);
-  DCHECK(class_access_flags != nullptr);
+  DCHECK(output_class_def != nullptr);
 
   // First check if the index is valid.
   if (index >= (is_field ? header_->field_ids_size_ : header_->method_ids_size_)) {
@@ -957,7 +957,7 @@
   for (size_t i = 0; i < header_->class_defs_size_; ++i) {
     const DexFile::ClassDef* class_def = class_def_begin + i;
     if (class_def->class_idx_ == *class_type_index) {
-      *class_access_flags = class_def->access_flags_;
+      *output_class_def = class_def;
       return true;
     }
   }
@@ -966,13 +966,13 @@
   return false;
 }
 
-bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field,
-                                                 const char* type_descr,
-                                                 uint32_t curr_index,
-                                                 uint32_t prev_index,
-                                                 bool* have_class,
-                                                 dex::TypeIndex* class_type_index,
-                                                 uint32_t* class_access_flags) {
+bool DexFileVerifier::CheckOrderAndGetClassDef(bool is_field,
+                                               const char* type_descr,
+                                               uint32_t curr_index,
+                                               uint32_t prev_index,
+                                               bool* have_class,
+                                               dex::TypeIndex* class_type_index,
+                                               const DexFile::ClassDef** class_def) {
   if (curr_index < prev_index) {
     ErrorStringPrintf("out-of-order %s indexes %" PRIu32 " and %" PRIu32,
                       type_descr,
@@ -982,7 +982,7 @@
   }
 
   if (!*have_class) {
-    *have_class = FindClassFlags(curr_index, is_field, class_type_index, class_access_flags);
+    *have_class = FindClassIndexAndDef(curr_index, is_field, class_type_index, class_def);
     if (!*have_class) {
       // Should have really found one.
       ErrorStringPrintf("could not find declaring class for %s index %" PRIu32,
@@ -994,34 +994,130 @@
   return true;
 }
 
+bool DexFileVerifier::CheckStaticFieldTypes(const DexFile::ClassDef* class_def) {
+  if (class_def == nullptr) {
+    return true;
+  }
+
+  ClassDataItemIterator field_it(*dex_file_, ptr_);
+  EncodedStaticFieldValueIterator array_it(*dex_file_, *class_def);
+
+  for (; field_it.HasNextStaticField() && array_it.HasNext(); field_it.Next(), array_it.Next()) {
+    uint32_t index = field_it.GetMemberIndex();
+    const DexFile::TypeId& type_id = dex_file_->GetTypeId(dex_file_->GetFieldId(index).type_idx_);
+    const char* field_type_name =
+        dex_file_->GetStringData(dex_file_->GetStringId(type_id.descriptor_idx_));
+    Primitive::Type field_type = Primitive::GetType(field_type_name[0]);
+    EncodedArrayValueIterator::ValueType array_type = array_it.GetValueType();
+    // Ensure this matches RuntimeEncodedStaticFieldValueIterator.
+    switch (array_type) {
+      case EncodedArrayValueIterator::ValueType::kBoolean:
+        if (field_type != Primitive::kPrimBoolean) {
+          ErrorStringPrintf("unexpected static field initial value type: 'Z' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kByte:
+        if (field_type != Primitive::kPrimByte) {
+          ErrorStringPrintf("unexpected static field initial value type: 'B' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kShort:
+        if (field_type != Primitive::kPrimShort) {
+          ErrorStringPrintf("unexpected static field initial value type: 'S' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kChar:
+        if (field_type != Primitive::kPrimChar) {
+          ErrorStringPrintf("unexpected static field initial value type: 'C' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kInt:
+        if (field_type != Primitive::kPrimInt) {
+          ErrorStringPrintf("unexpected static field initial value type: 'I' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kLong:
+        if (field_type != Primitive::kPrimLong) {
+          ErrorStringPrintf("unexpected static field initial value type: 'J' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kFloat:
+        if (field_type != Primitive::kPrimFloat) {
+          ErrorStringPrintf("unexpected static field initial value type: 'F' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kDouble:
+        if (field_type != Primitive::kPrimDouble) {
+          ErrorStringPrintf("unexpected static field initial value type: 'D' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kNull:
+      case EncodedArrayValueIterator::ValueType::kString:
+      case EncodedArrayValueIterator::ValueType::kType:
+        if (field_type != Primitive::kPrimNot) {
+          ErrorStringPrintf("unexpected static field initial value type: 'L' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      default:
+        ErrorStringPrintf("unexpected static field initial value type: %x", array_type);
+        return false;
+    }
+  }
+
+  if (array_it.HasNext()) {
+    ErrorStringPrintf("too many static field initial values");
+    return false;
+  }
+  return true;
+}
+
 template <bool kStatic>
 bool DexFileVerifier::CheckIntraClassDataItemFields(ClassDataItemIterator* it,
                                                     bool* have_class,
                                                     dex::TypeIndex* class_type_index,
-                                                    uint32_t* class_access_flags) {
+                                                    const DexFile::ClassDef** class_def) {
   DCHECK(it != nullptr);
   // These calls use the raw access flags to check whether the whole dex field is valid.
   uint32_t prev_index = 0;
   for (; kStatic ? it->HasNextStaticField() : it->HasNextInstanceField(); it->Next()) {
     uint32_t curr_index = it->GetMemberIndex();
-    if (!CheckOrderAndGetClassFlags(true,
-                                    kStatic ? "static field" : "instance field",
-                                    curr_index,
-                                    prev_index,
-                                    have_class,
-                                    class_type_index,
-                                    class_access_flags)) {
+    if (!CheckOrderAndGetClassDef(true,
+                                  kStatic ? "static field" : "instance field",
+                                  curr_index,
+                                  prev_index,
+                                  have_class,
+                                  class_type_index,
+                                  class_def)) {
       return false;
     }
-    prev_index = curr_index;
-
+    DCHECK(class_def != nullptr);
     if (!CheckClassDataItemField(curr_index,
                                  it->GetRawMemberAccessFlags(),
-                                 *class_access_flags,
+                                 (*class_def)->access_flags_,
                                  *class_type_index,
                                  kStatic)) {
       return false;
     }
+
+    prev_index = curr_index;
   }
 
   return true;
@@ -1033,30 +1129,31 @@
     std::unordered_set<uint32_t>* direct_method_indexes,
     bool* have_class,
     dex::TypeIndex* class_type_index,
-    uint32_t* class_access_flags) {
+    const DexFile::ClassDef** class_def) {
   uint32_t prev_index = 0;
   for (; kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod(); it->Next()) {
     uint32_t curr_index = it->GetMemberIndex();
-    if (!CheckOrderAndGetClassFlags(false,
-                                    kDirect ? "direct method" : "virtual method",
-                                    curr_index,
-                                    prev_index,
-                                    have_class,
-                                    class_type_index,
-                                    class_access_flags)) {
+    if (!CheckOrderAndGetClassDef(false,
+                                  kDirect ? "direct method" : "virtual method",
+                                  curr_index,
+                                  prev_index,
+                                  have_class,
+                                  class_type_index,
+                                  class_def)) {
       return false;
     }
-    prev_index = curr_index;
-
+    DCHECK(class_def != nullptr);
     if (!CheckClassDataItemMethod(curr_index,
                                   it->GetRawMemberAccessFlags(),
-                                  *class_access_flags,
+                                  (*class_def)->access_flags_,
                                   *class_type_index,
                                   it->GetMethodCodeItemOffset(),
                                   direct_method_indexes,
                                   kDirect)) {
       return false;
     }
+
+    prev_index = curr_index;
   }
 
   return true;
@@ -1071,19 +1168,19 @@
   // as the lookup is expensive, cache the result.
   bool have_class = false;
   dex::TypeIndex class_type_index;
-  uint32_t class_access_flags;
+  const DexFile::ClassDef* class_def = nullptr;
 
   // Check fields.
   if (!CheckIntraClassDataItemFields<true>(&it,
                                            &have_class,
                                            &class_type_index,
-                                           &class_access_flags)) {
+                                           &class_def)) {
     return false;
   }
   if (!CheckIntraClassDataItemFields<false>(&it,
                                             &have_class,
                                             &class_type_index,
-                                            &class_access_flags)) {
+                                            &class_def)) {
     return false;
   }
 
@@ -1092,18 +1189,25 @@
                                             &direct_method_indexes,
                                             &have_class,
                                             &class_type_index,
-                                            &class_access_flags)) {
+                                            &class_def)) {
     return false;
   }
   if (!CheckIntraClassDataItemMethods<false>(&it,
                                              &direct_method_indexes,
                                              &have_class,
                                              &class_type_index,
-                                             &class_access_flags)) {
+                                             &class_def)) {
     return false;
   }
 
-  ptr_ = it.EndDataPointer();
+  const uint8_t* end_ptr = it.EndDataPointer();
+
+  // Check static field types against initial static values in encoded array.
+  if (!CheckStaticFieldTypes(class_def)) {
+    return false;
+  }
+
+  ptr_ = end_ptr;
   return true;
 }
 
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 71b316c..d1043c6 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -86,13 +86,14 @@
                                 uint32_t code_offset,
                                 std::unordered_set<uint32_t>* direct_method_indexes,
                                 bool expect_direct);
-  bool CheckOrderAndGetClassFlags(bool is_field,
-                                  const char* type_descr,
-                                  uint32_t curr_index,
-                                  uint32_t prev_index,
-                                  bool* have_class,
-                                  dex::TypeIndex* class_type_index,
-                                  uint32_t* class_access_flags);
+  bool CheckOrderAndGetClassDef(bool is_field,
+                                const char* type_descr,
+                                uint32_t curr_index,
+                                uint32_t prev_index,
+                                bool* have_class,
+                                dex::TypeIndex* class_type_index,
+                                const DexFile::ClassDef** class_def);
+  bool CheckStaticFieldTypes(const DexFile::ClassDef* class_def);
 
   bool CheckPadding(size_t offset, uint32_t aligned_offset);
   bool CheckEncodedValue();
@@ -106,7 +107,7 @@
   bool CheckIntraClassDataItemFields(ClassDataItemIterator* it,
                                      bool* have_class,
                                      dex::TypeIndex* class_type_index,
-                                     uint32_t* class_access_flags);
+                                     const DexFile::ClassDef** class_def);
   // Check all methods of the given type from the given iterator. Load the class data from the first
   // method, if necessary (and return it), or use the given values.
   template <bool kDirect>
@@ -114,7 +115,7 @@
                                       std::unordered_set<uint32_t>* direct_method_indexes,
                                       bool* have_class,
                                       dex::TypeIndex* class_type_index,
-                                      uint32_t* class_access_flags);
+                                      const DexFile::ClassDef** class_def);
 
   bool CheckIntraCodeItem();
   bool CheckIntraStringDataItem();
@@ -165,16 +166,15 @@
       __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR;
   bool FailureReasonIsSet() const { return failure_reason_.size() != 0; }
 
-  // Retrieve class index and class access flag from the given member. index is the member index,
-  // which is taken as either a field or a method index (as designated by is_field). The result,
-  // if the member and declaring class could be found, is stored in class_type_index and
-  // class_access_flags.
-  // This is an expensive lookup, as we have to find the class-def by type index, which is a
+  // Retrieve class index and class def from the given member. index is the member index, which is
+  // taken as either a field or a method index (as designated by is_field). The result, if the
+  // member and declaring class could be found, is stored in class_type_index and class_def.
+  // This is an expensive lookup, as we have to find the class def by type index, which is a
   // linear search. The output values should thus be cached by the caller.
-  bool FindClassFlags(uint32_t index,
-                      bool is_field,
-                      dex::TypeIndex* class_type_index,
-                      uint32_t* class_access_flags);
+  bool FindClassIndexAndDef(uint32_t index,
+                            bool is_field,
+                            dex::TypeIndex* class_type_index,
+                            const DexFile::ClassDef** output_class_def);
 
   // Check validity of the given access flags, interpreted for a field in the context of a class
   // with the given second access flags.
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 7736f3d..068e122 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -123,7 +123,7 @@
 // To generate a base64 encoded Dex file (such as kGoodTestDex, below)
 // from Smali files, use:
 //
-//   smali -o classes.dex class1.smali [class2.smali ...]
+//   smali assemble -o classes.dex class1.smali [class2.smali ...]
 //   base64 classes.dex >classes.dex.base64
 
 // For reference.
@@ -1461,7 +1461,7 @@
 
 // To generate a base64 encoded Dex file version 037 from Smali files, use:
 //
-//   smali --api-level 24 -o classes.dex class1.smali [class2.smali ...]
+//   smali assemble --api 24 -o classes.dex class1.smali [class2.smali ...]
 //   base64 classes.dex >classes.dex.base64
 
 // Dex file version 037 generated from:
@@ -2090,4 +2090,105 @@
   }
 }
 
+TEST_F(DexFileVerifierTest, BadStaticFieldInitialValuesArray) {
+  // Generated DEX file version (037) from:
+  //
+  // .class public LBadStaticFieldInitialValuesArray;
+  // .super Ljava/lang/Object;
+  //
+  //  # static fields
+  //  .field static final c:C = 'c'
+  //  .field static final i:I = 0x1
+  //  .field static final s:Ljava/lang/String; = "s"
+  //
+  //  # direct methods
+  //  .method public constructor <init>()V
+  //      .registers 1
+  //      invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  //      return-void
+  //  .end method
+  //
+  // Output file was hex edited so that static field "i" has string typing in initial values array.
+  static const char kDexBase64[] =
+      "ZGV4CjAzNQBrMi4cCPcMvvXNRw0uI6RRubwMPwgEYXIsAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAL"
+      "AAAAcAAAAAYAAACcAAAAAQAAALQAAAADAAAAwAAAAAIAAADYAAAAAQAAAOgAAAAkAQAACAEAACAB"
+      "AAAoAQAAMAEAADMBAAA2AQAAOwEAAE8BAABjAQAAZgEAAGkBAABsAQAAAgAAAAMAAAAEAAAABQAA"
+      "AAYAAAAHAAAABwAAAAUAAAAAAAAAAgAAAAgAAAACAAEACQAAAAIABAAKAAAAAgAAAAAAAAADAAAA"
+      "AAAAAAIAAAABAAAAAwAAAAAAAAABAAAAAAAAAHsBAAB0AQAAAQABAAEAAABvAQAABAAAAHAQAQAA"
+      "AA4ABjxpbml0PgAGQS5qYXZhAAFDAAFJAANMQTsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEv"
+      "bGFuZy9TdHJpbmc7AAFWAAFjAAFpAAFzAAEABw4AAwNjFwoXCgMAAQAAGAEYARgAgYAEiAIADQAA"
+      "AAAAAAABAAAAAAAAAAEAAAALAAAAcAAAAAIAAAAGAAAAnAAAAAMAAAABAAAAtAAAAAQAAAADAAAA"
+      "wAAAAAUAAAACAAAA2AAAAAYAAAABAAAA6AAAAAEgAAABAAAACAEAAAIgAAALAAAAIAEAAAMgAAAB"
+      "AAAAbwEAAAUgAAABAAAAdAEAAAAgAAABAAAAewEAAAAQAAABAAAAjAEAAA==";
+
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length));
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+  EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(),
+                                       dex_file->Begin(),
+                                       dex_file->Size(),
+                                       "bad static field initial values array",
+                                       /*verify_checksum*/ true,
+                                       &error_msg));
+}
+
+TEST_F(DexFileVerifierTest, GoodStaticFieldInitialValuesArray) {
+  // Generated DEX file version (037) from:
+  //
+  //  .class public LGoodStaticFieldInitialValuesArray;
+  //  .super Ljava/lang/Object;
+  //
+  //  # static fields
+  //  .field static final b:B = 0x1t
+  //  .field static final c:C = 'c'
+  //  .field static final d:D = 0.6
+  //  .field static final f:F = 0.5f
+  //  .field static final i:I = 0x3
+  //  .field static final j:J = 0x4L
+  //  .field static final l1:Ljava/lang/String;
+  //  .field static final l2:Ljava/lang/String; = "s"
+  //  .field static final l3:Ljava/lang/Class; = Ljava/lang/String;
+  //  .field static final s:S = 0x2s
+  //  .field static final z:Z = true
+  //
+  //  # direct methods
+  //  .method public constructor <init>()V
+  //      .registers 1
+  //      invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  //      return-void
+  //  .end method
+  static const char kDexBase64[] =
+      "ZGV4CjAzNQAwWxLbdhFa1NGiFWjsy5fhUCHxe5QHtPY8AwAAcAAAAHhWNBIAAAAAAAAAAJwCAAAZ"
+      "AAAAcAAAAA0AAADUAAAAAQAAAAgBAAALAAAAFAEAAAIAAABsAQAAAQAAAHwBAACgAQAAnAEAAJwB"
+      "AACkAQAApwEAAKoBAACtAQAAsAEAALMBAAC2AQAA2wEAAO4BAAACAgAAFgIAABkCAAAcAgAAHwIA"
+      "ACICAAAlAgAAKAIAACsCAAAuAgAAMQIAADUCAAA5AgAAPQIAAEACAAABAAAAAgAAAAMAAAAEAAAA"
+      "BQAAAAYAAAAHAAAACAAAAAkAAAAKAAAACwAAAAwAAAANAAAADAAAAAsAAAAAAAAABgAAAA4AAAAG"
+      "AAEADwAAAAYAAgAQAAAABgADABEAAAAGAAQAEgAAAAYABQATAAAABgAJABQAAAAGAAkAFQAAAAYA"
+      "BwAWAAAABgAKABcAAAAGAAwAGAAAAAYAAAAAAAAACAAAAAAAAAAGAAAAAQAAAAgAAAAAAAAA////"
+      "/wAAAAB8AgAARAIAAAY8aW5pdD4AAUIAAUMAAUQAAUYAAUkAAUoAI0xHb29kU3RhdGljRmllbGRJ"
+      "bml0aWFsVmFsdWVzQXJyYXk7ABFMamF2YS9sYW5nL0NsYXNzOwASTGphdmEvbGFuZy9PYmplY3Q7"
+      "ABJMamF2YS9sYW5nL1N0cmluZzsAAVMAAVYAAVoAAWIAAWMAAWQAAWYAAWkAAWoAAmwxAAJsMgAC"
+      "bDMAAXMAAXoAAAsAAQNj8TMzMzMzM+M/ED8EAwYEHhcXGAkCAj8AAAAAAQABAAEAAAAAAAAABAAA"
+      "AHAQAQAAAA4ACwABAAAYARgBGAEYARgBGAEYARgBGAEYARgAgYAE5AQNAAAAAAAAAAEAAAAAAAAA"
+      "AQAAABkAAABwAAAAAgAAAA0AAADUAAAAAwAAAAEAAAAIAQAABAAAAAsAAAAUAQAABQAAAAIAAABs"
+      "AQAABgAAAAEAAAB8AQAAAiAAABkAAACcAQAABSAAAAEAAABEAgAAAxAAAAEAAABgAgAAASAAAAEA"
+      "AABkAgAAACAAAAEAAAB8AgAAABAAAAEAAACcAgAA";
+
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length));
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "good static field initial values array",
+                                      /*verify_checksum*/ true,
+                                      &error_msg));
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 091085a..9f34c12 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -515,6 +515,30 @@
   return os.str();
 }
 
+// Add some checks that ensure the flags make sense. We need a subclass to be in the context of
+// Instruction. Otherwise the flags from the instruction list don't work.
+struct InstructionStaticAsserts : private Instruction {
+  #define IMPLIES(a, b) (!(a) || (b))
+
+  #define VAR_ARGS_CHECK(o, c, pname, f, i, a, v) \
+    static_assert(IMPLIES((f) == k35c || (f) == k45cc, \
+                          ((v) & (kVerifyVarArg | kVerifyVarArgNonZero)) != 0), \
+                  "Missing var-arg verification");
+  #include "dex_instruction_list.h"
+    DEX_INSTRUCTION_LIST(VAR_ARGS_CHECK)
+  #undef DEX_INSTRUCTION_LIST
+  #undef VAR_ARGS_CHECK
+
+  #define VAR_ARGS_RANGE_CHECK(o, c, pname, f, i, a, v) \
+    static_assert(IMPLIES((f) == k3rc || (f) == k4rcc, \
+                          ((v) & (kVerifyVarArgRange | kVerifyVarArgRangeNonZero)) != 0), \
+                  "Missing var-arg verification");
+  #include "dex_instruction_list.h"
+    DEX_INSTRUCTION_LIST(VAR_ARGS_RANGE_CHECK)
+  #undef DEX_INSTRUCTION_LIST
+  #undef VAR_ARGS_RANGE_CHECK
+};
+
 std::ostream& operator<<(std::ostream& os, const Instruction::Code& code) {
   return os << Instruction::Name(code);
 }
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index a5ce3c2..11dc7e2 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -271,8 +271,8 @@
   V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kVerifyRegHPrototype) \
   V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \
-  V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite) \
-  V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite) \
+  V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite | kVerifyVarArg) \
+  V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite | kVerifyVarArgRange) \
   V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, kVerifyError)
 
diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/runtime/dex_to_dex_decompiler.cc
similarity index 91%
rename from compiler/dex/dex_to_dex_decompiler.cc
rename to runtime/dex_to_dex_decompiler.cc
index 85d5784..c15c9ec 100644
--- a/compiler/dex/dex_to_dex_decompiler.cc
+++ b/runtime/dex_to_dex_decompiler.cc
@@ -32,6 +32,7 @@
                 bool decompile_return_instruction)
     : code_item_(code_item),
       quickened_info_ptr_(quickened_info.data()),
+      quickened_info_start_(quickened_info.data()),
       quickened_info_end_(quickened_info.data() + quickened_info.size()),
       decompile_return_instruction_(decompile_return_instruction) {}
 
@@ -89,6 +90,7 @@
 
   const DexFile::CodeItem& code_item_;
   const uint8_t* quickened_info_ptr_;
+  const uint8_t* const quickened_info_start_;
   const uint8_t* const quickened_info_end_;
   const bool decompile_return_instruction_;
 
@@ -185,10 +187,15 @@
   }
 
   if (quickened_info_ptr_ != quickened_info_end_) {
-    LOG(FATAL) << "Failed to use all values in quickening info."
-               << " Actual: " << std::hex << quickened_info_ptr_
-               << " Expected: " << quickened_info_end_;
-    return false;
+    if (quickened_info_start_ == quickened_info_ptr_) {
+      LOG(WARNING) << "Failed to use any value in quickening info,"
+                   << " potentially due to duplicate methods.";
+    } else {
+      LOG(FATAL) << "Failed to use all values in quickening info."
+                 << " Actual: " << std::hex << reinterpret_cast<uintptr_t>(quickened_info_ptr_)
+                 << " Expected: " << reinterpret_cast<uintptr_t>(quickened_info_end_);
+      return false;
+    }
   }
 
   return true;
diff --git a/compiler/dex/dex_to_dex_decompiler.h b/runtime/dex_to_dex_decompiler.h
similarity index 88%
rename from compiler/dex/dex_to_dex_decompiler.h
rename to runtime/dex_to_dex_decompiler.h
index b5d5b91..d7cb164 100644
--- a/compiler/dex/dex_to_dex_decompiler.h
+++ b/runtime/dex_to_dex_decompiler.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_
-#define ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_
+#ifndef ART_RUNTIME_DEX_TO_DEX_DECOMPILER_H_
+#define ART_RUNTIME_DEX_TO_DEX_DECOMPILER_H_
 
 #include "base/array_ref.h"
 #include "dex_file.h"
@@ -36,4 +36,4 @@
 }  // namespace optimizer
 }  // namespace art
 
-#endif  // ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_
+#endif  // ART_RUNTIME_DEX_TO_DEX_DECOMPILER_H_
diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc
index db65e40..3c8243a 100644
--- a/runtime/dexopt_test.cc
+++ b/runtime/dexopt_test.cc
@@ -45,18 +45,23 @@
 }
 
 void DexoptTest::GenerateOatForTest(const std::string& dex_location,
-                        const std::string& oat_location,
-                        CompilerFilter::Filter filter,
-                        bool relocate,
-                        bool pic,
-                        bool with_alternate_image) {
+                                    const std::string& oat_location_in,
+                                    CompilerFilter::Filter filter,
+                                    bool relocate,
+                                    bool pic,
+                                    bool with_alternate_image) {
   std::string dalvik_cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA));
   std::string dalvik_cache_tmp = dalvik_cache + ".redirected";
-
+  std::string oat_location = oat_location_in;
   if (!relocate) {
     // Temporarily redirect the dalvik cache so dex2oat doesn't find the
     // relocated image file.
     ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno);
+    // If the oat location is in dalvik cache, replace the cache path with the temporary one.
+    size_t pos = oat_location.find(dalvik_cache);
+    if (pos != std::string::npos) {
+        oat_location = oat_location.replace(pos, dalvik_cache.length(), dalvik_cache_tmp);
+    }
   }
 
   std::vector<std::string> args;
@@ -90,6 +95,7 @@
   if (!relocate) {
     // Restore the dalvik cache if needed.
     ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno);
+    oat_location = oat_location_in;
   }
 
   // Verify the odex file was generated as expected.
@@ -122,7 +128,7 @@
   }
 
   if (!with_alternate_image) {
-    if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
+    if (CompilerFilter::IsAotCompilationEnabled(filter)) {
       if (relocate) {
         EXPECT_EQ(reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin()),
             oat_header.GetImageFileLocationOatDataBegin());
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 355d7b3..6b96567 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -58,18 +58,13 @@
   }
 }
 
-constexpr Runtime::CalleeSaveType kInitEntrypointSaveType =
-    // TODO: Change allocation entrypoints on MIPS and MIPS64 to kSaveEverything.
-    (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly
-                                                     : Runtime::kSaveEverything;
-
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false);
@@ -83,7 +78,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false);
@@ -98,7 +93,7 @@
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true);
@@ -111,7 +106,7 @@
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
   if (LIKELY(result != nullptr)) {
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 68ef15d..c3dd21f 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -47,10 +47,11 @@
 // WriteBarrier, and from there to here.
 class CardTable {
  public:
-  static constexpr size_t kCardShift = 7;
+  static constexpr size_t kCardShift = 10;
   static constexpr size_t kCardSize = 1 << kCardShift;
   static constexpr uint8_t kCardClean = 0x0;
   static constexpr uint8_t kCardDirty = 0x70;
+  static constexpr uint8_t kCardAged = kCardDirty - 1;
 
   static CardTable* Create(const uint8_t* heap_begin, size_t heap_capacity);
   ~CardTable();
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 34e30c1..c416b9c 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -391,7 +391,7 @@
     uintptr_t end = start + CardTable::kCardSize;
     live_bitmap->VisitMarkedRange(start,
                                   end,
-                                  [this, callback, arg](mirror::Object* obj) {
+                                  [callback, arg](mirror::Object* obj) {
       callback(obj, arg);
     });
   }
@@ -402,7 +402,7 @@
     uintptr_t end = start + CardTable::kCardSize;
     live_bitmap->VisitMarkedRange(start,
                                   end,
-                                  [this, callback, arg](mirror::Object* obj) {
+                                  [callback, arg](mirror::Object* obj) {
       callback(obj, arg);
     });
   }
@@ -560,7 +560,7 @@
             << start << " " << *space_;
         space_->GetLiveBitmap()->VisitMarkedRange(start,
                                                   start + CardTable::kCardSize,
-                                                  [this, callback, arg](mirror::Object* obj) {
+                                                  [callback, arg](mirror::Object* obj) {
           callback(obj, arg);
         });
       });
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index dd449f9..3503973 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -130,7 +130,7 @@
       mirror::Object* to_ref = GetFwdPtr(from_ref);
       if (to_ref == nullptr) {
         // It isn't marked yet. Mark it by copying it to the to-space.
-        to_ref = Copy(from_ref);
+        to_ref = Copy(from_ref, holder, offset);
       }
       DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
           << "from_ref=" << from_ref << " to_ref=" << to_ref;
@@ -152,7 +152,8 @@
 
 inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
   mirror::Object* ret;
-  if (from_ref == nullptr) {
+  // We can get here before marking starts since we gray immune objects before the marking phase.
+  if (from_ref == nullptr || !Thread::Current()->GetIsGcMarking()) {
     return from_ref;
   }
   // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 4192f34..a450a75 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -77,6 +77,7 @@
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false),
+      is_using_read_barrier_entrypoints_(false),
       is_active_(false),
       is_asserting_to_space_invariant_(false),
       region_space_bitmap_(nullptr),
@@ -163,6 +164,15 @@
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
     InitializePhase();
   }
+  if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+    // Switch to read barrier mark entrypoints before we gray the objects. This is required in case
+    // a mutator sees a gray bit and dispatches on the entrpoint. (b/37876887).
+    ActivateReadBarrierEntrypoints();
+    // Gray dirty immune objects concurrently to reduce GC pause times. We re-process gray cards in
+    // the pause.
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    GrayAllDirtyImmuneObjects();
+  }
   FlipThreadRoots();
   {
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
@@ -192,6 +202,59 @@
   thread_running_gc_ = nullptr;
 }
 
+class ConcurrentCopying::ActivateReadBarrierEntrypointsCheckpoint : public Closure {
+ public:
+  explicit ActivateReadBarrierEntrypointsCheckpoint(ConcurrentCopying* concurrent_copying)
+      : concurrent_copying_(concurrent_copying) {}
+
+  void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
+    // Note: self is not necessarily equal to thread since thread may be suspended.
+    Thread* self = Thread::Current();
+    DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
+        << thread->GetState() << " thread " << thread << " self " << self;
+    // Switch to the read barrier entrypoints.
+    thread->SetReadBarrierEntrypoints();
+    // If thread is a running mutator, then act on behalf of the garbage collector.
+    // See the code in ThreadList::RunCheckpoint.
+    concurrent_copying_->GetBarrier().Pass(self);
+  }
+
+ private:
+  ConcurrentCopying* const concurrent_copying_;
+};
+
+class ConcurrentCopying::ActivateReadBarrierEntrypointsCallback : public Closure {
+ public:
+  explicit ActivateReadBarrierEntrypointsCallback(ConcurrentCopying* concurrent_copying)
+      : concurrent_copying_(concurrent_copying) {}
+
+  void Run(Thread* self ATTRIBUTE_UNUSED) OVERRIDE REQUIRES(Locks::thread_list_lock_) {
+    // This needs to run under the thread_list_lock_ critical section in ThreadList::RunCheckpoint()
+    // to avoid a race with ThreadList::Register().
+    CHECK(!concurrent_copying_->is_using_read_barrier_entrypoints_);
+    concurrent_copying_->is_using_read_barrier_entrypoints_ = true;
+  }
+
+ private:
+  ConcurrentCopying* const concurrent_copying_;
+};
+
+void ConcurrentCopying::ActivateReadBarrierEntrypoints() {
+  Thread* const self = Thread::Current();
+  ActivateReadBarrierEntrypointsCheckpoint checkpoint(this);
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  gc_barrier_->Init(self, 0);
+  ActivateReadBarrierEntrypointsCallback callback(this);
+  const size_t barrier_count = thread_list->RunCheckpoint(&checkpoint, &callback);
+  // If there are no threads to wait which implies that all the checkpoint functions are finished,
+  // then no need to release the mutator lock.
+  if (barrier_count == 0) {
+    return;
+  }
+  ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+  gc_barrier_->Increment(self, barrier_count);
+}
+
 void ConcurrentCopying::BindBitmaps() {
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -352,9 +415,12 @@
     if (kVerifyNoMissingCardMarks) {
       cc->VerifyNoMissingCardMarks();
     }
-    CHECK(thread == self);
+    CHECK_EQ(thread, self);
     Locks::mutator_lock_->AssertExclusiveHeld(self);
-    cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
+    {
+      TimingLogger::ScopedTiming split2("(Paused)SetFromSpace", cc->GetTimings());
+      cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
+    }
     cc->SwapStacks();
     if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
       cc->RecordLiveStackFreezeSize(self);
@@ -368,11 +434,11 @@
     }
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
-      TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
+      TimingLogger::ScopedTiming split3("(Paused)VisitTransactionRoots", cc->GetTimings());
       Runtime::Current()->VisitTransactionRoots(cc);
     }
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-      cc->GrayAllDirtyImmuneObjects();
+      cc->GrayAllNewlyDirtyImmuneObjects();
       if (kIsDebugBuild) {
         // Check that all non-gray immune objects only refernce immune objects.
         cc->VerifyGrayImmuneObjects();
@@ -519,8 +585,8 @@
 
 void ConcurrentCopying::VerifyNoMissingCardMarkCallback(mirror::Object* obj, void* arg) {
   auto* collector = reinterpret_cast<ConcurrentCopying*>(arg);
-  // Objects not on dirty cards should never have references to newly allocated regions.
-  if (!collector->heap_->GetCardTable()->IsDirty(obj)) {
+  // Objects not on dirty or aged cards should never have references to newly allocated regions.
+  if (collector->heap_->GetCardTable()->GetCard(obj) == gc::accounting::CardTable::kCardClean) {
     VerifyNoMissingCardMarkVisitor visitor(collector, /*holder*/ obj);
     obj->VisitReferences</*kVisitNativeRoots*/true, kVerifyNone, kWithoutReadBarrier>(
         visitor,
@@ -583,53 +649,100 @@
   }
 }
 
+template <bool kConcurrent>
 class ConcurrentCopying::GrayImmuneObjectVisitor {
  public:
-  explicit GrayImmuneObjectVisitor() {}
+  explicit GrayImmuneObjectVisitor(Thread* self) : self_(self) {}
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (kUseBakerReadBarrier) {
-      if (kIsDebugBuild) {
-        Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+    if (kUseBakerReadBarrier && obj->GetReadBarrierState() == ReadBarrier::WhiteState()) {
+      if (kConcurrent) {
+        Locks::mutator_lock_->AssertSharedHeld(self_);
+        obj->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState());
+        // Mod union table VisitObjects may visit the same object multiple times so we can't check
+        // the result of the atomic set.
+      } else {
+        Locks::mutator_lock_->AssertExclusiveHeld(self_);
+        obj->SetReadBarrierState(ReadBarrier::GrayState());
       }
-      obj->SetReadBarrierState(ReadBarrier::GrayState());
     }
   }
 
   static void Callback(mirror::Object* obj, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) {
-    reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj);
+    reinterpret_cast<GrayImmuneObjectVisitor<kConcurrent>*>(arg)->operator()(obj);
   }
+
+ private:
+  Thread* const self_;
 };
 
 void ConcurrentCopying::GrayAllDirtyImmuneObjects() {
-  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
-  gc::Heap* const heap = Runtime::Current()->GetHeap();
-  accounting::CardTable* const card_table = heap->GetCardTable();
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  TimingLogger::ScopedTiming split("GrayAllDirtyImmuneObjects", GetTimings());
+  accounting::CardTable* const card_table = heap_->GetCardTable();
+  Thread* const self = Thread::Current();
+  using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ true>;
+  VisitorType visitor(self);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
     DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-    GrayImmuneObjectVisitor visitor;
-    accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
     // Mark all the objects on dirty cards since these may point to objects in other space.
     // Once these are marked, the GC will eventually clear them later.
     // Table is non null for boot image and zygote spaces. It is only null for application image
     // spaces.
     if (table != nullptr) {
-      // TODO: Consider adding precleaning outside the pause.
       table->ProcessCards();
-      table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor);
-      // Since the cards are recorded in the mod-union table and this is paused, we can clear
-      // the cards for the space (to madvise).
+      table->VisitObjects(&VisitorType::Callback, &visitor);
+      // Don't clear cards here since we need to rescan in the pause. If we cleared the cards here,
+      // there would be races with the mutator marking new cards.
+    } else {
+      // Keep cards aged if we don't have a mod-union table since we may need to scan them in future
+      // GCs. This case is for app images.
+      card_table->ModifyCardsAtomic(
+          space->Begin(),
+          space->End(),
+          [](uint8_t card) {
+            return (card != gc::accounting::CardTable::kCardClean)
+                ? gc::accounting::CardTable::kCardAged
+                : card;
+          },
+          /* card modified visitor */ VoidFunctor());
+      card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(),
+                                               space->Begin(),
+                                               space->End(),
+                                               visitor,
+                                               gc::accounting::CardTable::kCardAged);
+    }
+  }
+}
+
+void ConcurrentCopying::GrayAllNewlyDirtyImmuneObjects() {
+  TimingLogger::ScopedTiming split("(Paused)GrayAllNewlyDirtyImmuneObjects", GetTimings());
+  accounting::CardTable* const card_table = heap_->GetCardTable();
+  using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ false>;
+  Thread* const self = Thread::Current();
+  VisitorType visitor(self);
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+
+    // Don't need to scan aged cards since we did these before the pause. Note that scanning cards
+    // also handles the mod-union table cards.
+    card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(),
+                                             space->Begin(),
+                                             space->End(),
+                                             visitor,
+                                             gc::accounting::CardTable::kCardDirty);
+    if (table != nullptr) {
+      // Add the cards to the mod-union table so that we can clear cards to save RAM.
+      table->ProcessCards();
       TimingLogger::ScopedTiming split2("(Paused)ClearCards", GetTimings());
       card_table->ClearCardRange(space->Begin(),
                                  AlignDown(space->End(), accounting::CardTable::kCardSize));
-    } else {
-      // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the
-      // pause because app image spaces are all dirty pages anyways.
-      card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor);
     }
   }
-  // Since all of the objects that may point to other spaces are marked, we can avoid all the read
+  // Since all of the objects that may point to other spaces are gray, we can avoid all the read
   // barriers in the immune spaces.
   updated_all_immune_objects_.StoreRelaxed(true);
 }
@@ -658,6 +771,7 @@
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      // Only need to scan gray objects.
       if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
         collector_->ScanImmuneObject(obj);
         // Done scanning the object, go back to white.
@@ -707,6 +821,7 @@
       if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects && table != nullptr) {
         table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor);
       } else {
+        // TODO: Scan only the aged cards.
         live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                       reinterpret_cast<uintptr_t>(space->Limit()),
                                       visitor);
@@ -876,6 +991,12 @@
     // to avoid a race with ThreadList::Register().
     CHECK(concurrent_copying_->is_marking_);
     concurrent_copying_->is_marking_ = false;
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      CHECK(concurrent_copying_->is_using_read_barrier_entrypoints_);
+      concurrent_copying_->is_using_read_barrier_entrypoints_ = false;
+    } else {
+      CHECK(!concurrent_copying_->is_using_read_barrier_entrypoints_);
+    }
   }
 
  private:
@@ -1621,25 +1742,29 @@
   Thread* const self = Thread::Current();
   WriterMutexLock rmu(self, *Locks::heap_bitmap_lock_);
   space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
-  // Pick the current live bitmap (mark bitmap if swapped).
-  accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
-  accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
-  // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
-  std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
-  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
-                                reinterpret_cast<uintptr_t>(range.second),
-                                [mark_bitmap, los, self](mirror::Object* obj)
-      REQUIRES(Locks::heap_bitmap_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (los->IsZygoteLargeObject(self, obj)) {
-      mark_bitmap->Set(obj);
-    }
-  });
+  if (los != nullptr) {
+    // Pick the current live bitmap (mark bitmap if swapped).
+    accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
+    accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
+    // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
+    std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
+                                  reinterpret_cast<uintptr_t>(range.second),
+                                  [mark_bitmap, los, self](mirror::Object* obj)
+        REQUIRES(Locks::heap_bitmap_lock_)
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      if (los->IsZygoteLargeObject(self, obj)) {
+        mark_bitmap->Set(obj);
+      }
+    });
+  }
 }
 
 void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) {
   TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings());
-  RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
+  if (heap_->GetLargeObjectsSpace() != nullptr) {
+    RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
+  }
 }
 
 void ConcurrentCopying::ReclaimPhase() {
@@ -1888,7 +2013,6 @@
         heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
     accounting::LargeObjectBitmap* los_bitmap =
         heap_mark_bitmap_->GetLargeObjectBitmap(ref);
-    CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
     bool is_los = mark_bitmap == nullptr;
     if ((!is_los && mark_bitmap->Test(ref)) ||
         (is_los && los_bitmap->Test(ref))) {
@@ -2160,8 +2284,16 @@
   return reinterpret_cast<mirror::Object*>(addr);
 }
 
-mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) {
+mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref,
+                                        mirror::Object* holder,
+                                        MemberOffset offset) {
   DCHECK(region_space_->IsInFromSpace(from_ref));
+  // If the class pointer is null, the object is invalid. This could occur for a dangling pointer
+  // from a previous GC that is either inside or outside the allocated region.
+  mirror::Class* klass = from_ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
+  if (UNLIKELY(klass == nullptr)) {
+    heap_->GetVerification()->LogHeapCorruption(holder, offset, from_ref, /* fatal */ true);
+  }
   // There must not be a read barrier to avoid nested RB that might violate the to-space invariant.
   // Note that from_ref is a from space ref so the SizeOf() call will access the from-space meta
   // objects, but it's ok and necessary.
@@ -2216,7 +2348,7 @@
   DCHECK(to_ref != nullptr);
 
   // Copy the object excluding the lock word since that is handled in the loop.
-  to_ref->SetClass(from_ref->GetClass<kVerifyNone, kWithoutReadBarrier>());
+  to_ref->SetClass(klass);
   const size_t kObjectHeaderSize = sizeof(mirror::Object);
   DCHECK_GE(obj_size, kObjectHeaderSize);
   static_assert(kObjectHeaderSize == sizeof(mirror::HeapReference<mirror::Class>) +
@@ -2384,7 +2516,6 @@
       heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
   accounting::LargeObjectBitmap* los_bitmap =
       heap_mark_bitmap_->GetLargeObjectBitmap(ref);
-  CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
   bool is_los = mark_bitmap == nullptr;
   if (!is_los && mark_bitmap->Test(ref)) {
     // Already marked.
@@ -2424,7 +2555,7 @@
       if (is_los && !IsAligned<kPageSize>(ref)) {
         // Ref is a large object that is not aligned, it must be heap corruption. Dump data before
         // AtomicSetReadBarrierState since it will fault if the address is not valid.
-        heap_->GetVerification()->LogHeapCorruption(ref, offset, holder, /* fatal */ true);
+        heap_->GetVerification()->LogHeapCorruption(holder, offset, ref, /* fatal */ true);
       }
       // Not marked or on the allocation stack. Try to mark it.
       // This may or may not succeed, which is ok.
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index f877314..c09e0eb 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -118,6 +118,11 @@
   bool IsMarking() const {
     return is_marking_;
   }
+  // We may want to use read barrier entrypoints before is_marking_ is true since concurrent graying
+  // creates a small window where we might dispatch on these entrypoints.
+  bool IsUsingReadBarrierEntrypoints() const {
+    return is_using_read_barrier_entrypoints_;
+  }
   bool IsActive() const {
     return is_active_;
   }
@@ -133,7 +138,10 @@
  private:
   void PushOntoMarkStack(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
-  mirror::Object* Copy(mirror::Object* from_ref) REQUIRES_SHARED(Locks::mutator_lock_)
+  mirror::Object* Copy(mirror::Object* from_ref,
+                       mirror::Object* holder,
+                       MemberOffset offset)
+      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void Scan(mirror::Object* to_ref) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
@@ -162,6 +170,9 @@
   void GrayAllDirtyImmuneObjects()
       REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
+  void GrayAllNewlyDirtyImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
   void VerifyGrayImmuneObjects()
       REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
@@ -249,6 +260,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_);
+  // Set the read barrier mark entrypoints to non-null.
+  void ActivateReadBarrierEntrypoints();
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
@@ -265,6 +278,8 @@
       GUARDED_BY(mark_stack_lock_);
   Thread* thread_running_gc_;
   bool is_marking_;                       // True while marking is ongoing.
+  // True while we might dispatch on the read barrier entrypoints.
+  bool is_using_read_barrier_entrypoints_;
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
   ImmuneSpaces immune_spaces_;
@@ -327,6 +342,8 @@
   // ObjPtr since the GC may transition to suspended and runnable between phases.
   mirror::Class* java_lang_Object_;
 
+  class ActivateReadBarrierEntrypointsCallback;
+  class ActivateReadBarrierEntrypointsCheckpoint;
   class AssertToSpaceInvariantFieldVisitor;
   class AssertToSpaceInvariantObjectVisitor;
   class AssertToSpaceInvariantRefsVisitor;
@@ -336,7 +353,7 @@
   class DisableMarkingCheckpoint;
   class DisableWeakRefAccessCallback;
   class FlipCallback;
-  class GrayImmuneObjectVisitor;
+  template <bool kConcurrent> class GrayImmuneObjectVisitor;
   class ImmuneSpaceScanObjVisitor;
   class LostCopyVisitor;
   class RefFieldsVisitor;
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index cab293f..9d3d950 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -140,7 +140,7 @@
       }
     } else {
       DCHECK(!space_->HasAddress(obj));
-      auto slow_path = [this](const mirror::Object* ref)
+      auto slow_path = [](const mirror::Object* ref)
           REQUIRES_SHARED(Locks::mutator_lock_) {
         // Marking a large object, make sure its aligned as a sanity check.
         if (!IsAligned<kPageSize>(ref)) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 7da7221..298336a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -150,8 +150,13 @@
 static uint8_t* const kPreferredAllocSpaceBegin =
     reinterpret_cast<uint8_t*>(300 * MB - Heap::kDefaultNonMovingSpaceCapacity);
 #else
-// For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
+#ifdef __ANDROID__
+// For 32-bit Android, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
 static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x20000000);
+#else
+// For 32-bit host, use 0x40000000 because asan uses most of the space below this.
+static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x40000000);
+#endif
 #endif
 
 static inline bool CareAboutPauseTimes() {
@@ -3517,7 +3522,13 @@
     // is not the heap task daemon thread, it's considered as a
     // blocking GC (i.e., blocking itself).
     running_collection_is_blocking_ = true;
-    VLOG(gc) << "Starting a blocking GC " << cause;
+    // Don't log fake "GC" types that are only used for debugger or hidden APIs. If we log these,
+    // it results in log spam. kGcCauseExplicit is already logged in LogGC, so avoid it here too.
+    if (cause == kGcCauseForAlloc ||
+        cause == kGcCauseForNativeAlloc ||
+        cause == kGcCauseDisableMovingGc) {
+      VLOG(gc) << "Starting a blocking GC " << cause;
+    }
   }
   return last_gc_type;
 }
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e9f0758..748d378 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -651,7 +651,8 @@
               bitmap_name,
               image_bitmap_map.release(),
               reinterpret_cast<uint8_t*>(map->Begin()),
-              image_objects.End()));
+              // Make sure the bitmap is aligned to card size instead of just bitmap word size.
+              RoundUp(image_objects.End(), gc::accounting::CardTable::kCardSize)));
       if (bitmap == nullptr) {
         *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
         return nullptr;
@@ -1695,6 +1696,29 @@
   return true;
 }
 
+ImageSpace::~ImageSpace() {
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr) {
+    return;
+  }
+
+  if (GetImageHeader().IsAppImage()) {
+    // This image space did not modify resolution method then in Init.
+    return;
+  }
+
+  if (!runtime->HasResolutionMethod()) {
+    // Another image space has already unloaded the below methods.
+    return;
+  }
+
+  runtime->ClearInstructionSet();
+  runtime->ClearResolutionMethod();
+  runtime->ClearImtConflictMethod();
+  runtime->ClearImtUnimplementedMethod();
+  runtime->ClearCalleeSaveMethods();
+}
+
 std::unique_ptr<ImageSpace> ImageSpace::CreateFromAppImage(const char* image,
                                                            const OatFile* oat_file,
                                                            std::string* error_msg) {
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 199bbdd..aa3dd42 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -159,6 +159,9 @@
 
   void DumpSections(std::ostream& os) const;
 
+  // De-initialize the image-space by undoing the effects in Init().
+  virtual ~ImageSpace();
+
  protected:
   // Tries to initialize an ImageSpace from the given image path, returning null on error.
   //
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index af57397..06638e7 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -78,6 +78,8 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))))
 #define STRING_DEX_CACHE_ELEMENT_SIZE 8
 DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE), (static_cast<int32_t>(sizeof(art::mirror::StringDexCachePair))))
+#define CARD_TABLE_CARD_SHIFT 0xa
+DEFINE_CHECK_EQ(static_cast<size_t>(CARD_TABLE_CARD_SHIFT), (static_cast<size_t>(art::gc::accounting::CardTable::kCardShift)))
 #define MIN_LARGE_OBJECT_THRESHOLD 0x3000
 DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold)))
 #define LOCK_WORD_STATE_SHIFT 30
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 4f390fd..8bdf6b1 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -34,7 +34,6 @@
 #include <time.h>
 #include <time.h>
 #include <unistd.h>
-
 #include <set>
 
 #include "android-base/stringprintf.h"
@@ -502,9 +501,16 @@
   void DumpHeapArray(mirror::Array* obj, mirror::Class* klass)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass)
+  void DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void DumpHeapInstanceObject(mirror::Object* obj,
+                              mirror::Class* klass,
+                              const std::set<mirror::Object*>& fake_roots)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool AddRuntimeInternalObjectsField(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ProcessHeap(bool header_first)
       REQUIRES(Locks::mutator_lock_) {
     // Reset current heap and object count.
@@ -1062,37 +1068,17 @@
   ++objects_in_segment_;
 }
 
-// Use for visiting the GcRoots held live by ArtFields, ArtMethods, and ClassLoaders.
-class GcRootVisitor {
- public:
-  explicit GcRootVisitor(Hprof* hprof) : hprof_(hprof) {}
-
-  void operator()(mirror::Object* obj ATTRIBUTE_UNUSED,
-                  MemberOffset offset ATTRIBUTE_UNUSED,
-                  bool is_static ATTRIBUTE_UNUSED) const {}
-
-  // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
-  // running during the hprof dumping process.
-  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!root->IsNull()) {
-      VisitRoot(root);
-    }
+bool Hprof::AddRuntimeInternalObjectsField(mirror::Class* klass) {
+  if (klass->IsDexCacheClass()) {
+    return true;
   }
-
-  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Object* obj = root->AsMirrorPtr();
-    // The two cases are either classes or dex cache arrays. If it is a dex cache array, then use
-    // VM internal. Otherwise the object is a declaring class of an ArtField or ArtMethod or a
-    // class from a ClassLoader.
-    hprof_->VisitRoot(obj, RootInfo(obj->IsClass() ? kRootStickyClass : kRootVMInternal));
+  // IsClassLoaderClass is true for subclasses of classloader but we only want to add the fake
+  // field to the java.lang.ClassLoader class.
+  if (klass->IsClassLoaderClass() && klass->GetSuperClass()->IsObjectClass()) {
+    return true;
   }
-
-
- private:
-  Hprof* const hprof_;
-};
+  return false;
+}
 
 void Hprof::DumpHeapObject(mirror::Object* obj) {
   // Ignore classes that are retired.
@@ -1103,8 +1089,41 @@
 
   ++total_objects_;
 
-  GcRootVisitor visitor(this);
-  obj->VisitReferences(visitor, VoidFunctor());
+  class RootCollector {
+   public:
+    explicit RootCollector() {}
+
+    void operator()(mirror::Object*, MemberOffset, bool) const {}
+
+    // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
+    // running during the hprof dumping process.
+    void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      if (!root->IsNull()) {
+        VisitRoot(root);
+      }
+    }
+
+    void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      roots_.insert(root->AsMirrorPtr());
+    }
+
+    const std::set<mirror::Object*>& GetRoots() const {
+      return roots_;
+    }
+
+   private:
+    // These roots are actually live from the object. Avoid marking them as roots in hprof to make
+    // it easier to debug class unloading.
+    mutable std::set<mirror::Object*> roots_;
+  };
+
+  RootCollector visitor;
+  // Collect all native roots.
+  if (!obj->IsClass()) {
+    obj->VisitReferences(visitor, VoidFunctor());
+  }
 
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const gc::space::ContinuousSpace* const space = heap->FindContinuousSpaceFromObject(obj, true);
@@ -1112,15 +1131,18 @@
   if (space != nullptr) {
     if (space->IsZygoteSpace()) {
       heap_type = HPROF_HEAP_ZYGOTE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
       // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as
       // HPROF_HEAP_APP. b/35762934
       heap_type = HPROF_HEAP_IMAGE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     }
   } else {
     const auto* los = heap->GetLargeObjectsSpace();
     if (los->Contains(obj) && los->IsZygoteLargeObject(Thread::Current(), obj)) {
       heap_type = HPROF_HEAP_ZYGOTE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     }
   }
   CheckHeapSegmentConstraints();
@@ -1164,7 +1186,7 @@
     } else if (c->IsArrayClass()) {
       DumpHeapArray(obj->AsArray(), c);
     } else {
-      DumpHeapInstanceObject(obj, c);
+      DumpHeapInstanceObject(obj, c, visitor.GetRoots());
     }
   }
 
@@ -1269,7 +1291,10 @@
 
   // Instance fields for this class (no superclass fields)
   int iFieldCount = klass->NumInstanceFields();
-  if (klass->IsStringClass()) {
+  // add_internal_runtime_objects is only for classes that may retain objects live through means
+  // other than fields. It is never the case for strings.
+  const bool add_internal_runtime_objects = AddRuntimeInternalObjectsField(klass);
+  if (klass->IsStringClass() || add_internal_runtime_objects) {
     __ AddU2((uint16_t)iFieldCount + 1);
   } else {
     __ AddU2((uint16_t)iFieldCount);
@@ -1284,6 +1309,21 @@
   if (klass->IsStringClass()) {
     __ AddStringId(LookupStringId("value"));
     __ AddU1(hprof_basic_object);
+  } else if (add_internal_runtime_objects) {
+    __ AddStringId(LookupStringId("runtimeInternalObjects"));
+    __ AddU1(hprof_basic_object);
+  }
+}
+
+void Hprof::DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements) {
+  __ AddU1(HPROF_OBJECT_ARRAY_DUMP);
+  __ AddObjectId(obj);
+  __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj));
+  __ AddU4(elements.size());
+  __ AddClassId(LookupClassId(
+      Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kObjectArrayClass)));
+  for (mirror::Object* e : elements) {
+    __ AddObjectId(e);
   }
 }
 
@@ -1327,7 +1367,9 @@
   }
 }
 
-void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) {
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj,
+                                   mirror::Class* klass,
+                                   const std::set<mirror::Object*>& fake_roots) {
   // obj is an instance object.
   __ AddU1(HPROF_INSTANCE_DUMP);
   __ AddObjectId(obj);
@@ -1341,6 +1383,7 @@
 
   // What we will use for the string value if the object is a string.
   mirror::Object* string_value = nullptr;
+  mirror::Object* fake_object_array = nullptr;
 
   // Write the instance data;  fields for this class, followed by super class fields, and so on.
   do {
@@ -1396,8 +1439,12 @@
         }
       }
       __ AddObjectId(string_value);
+    } else if (AddRuntimeInternalObjectsField(klass)) {
+      // We need an id that is guaranteed to not be used, use 1/2 of the object alignment.
+      fake_object_array = reinterpret_cast<mirror::Object*>(
+          reinterpret_cast<uintptr_t>(obj) + kObjectAlignment / 2);
+      __ AddObjectId(fake_object_array);
     }
-
     klass = klass->GetSuperClass();
   } while (klass != nullptr);
 
@@ -1419,6 +1466,8 @@
       __ AddU1(hprof_basic_char);
       __ AddU2List(s->GetValue(), s->GetLength());
     }
+  } else if (fake_object_array != nullptr) {
+    DumpFakeObjectArray(fake_object_array, fake_roots);
   }
 }
 
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 70be30c..96934bc 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -568,7 +568,7 @@
   // Copy in content.
   memcpy(h_array->GetData(), mem_map->Begin(), map_size);
   // Be proactive releasing memory.
-  mem_map.release();
+  mem_map.reset();
 
   // Create a ByteArrayInputStream.
   Handle<mirror::Class> h_class(hs.NewHandle(
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 971d039..e8a9904 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -335,7 +335,7 @@
   expandBufAdd1(reply, false);   // canUnrestrictedlyRedefineClasses
   expandBufAdd1(reply, false);   // canPopFrames
   expandBufAdd1(reply, true);    // canUseInstanceFilters
-  expandBufAdd1(reply, false);   // canGetSourceDebugExtension
+  expandBufAdd1(reply, true);    // canGetSourceDebugExtension
   expandBufAdd1(reply, false);   // canRequestVMDeathEvent
   expandBufAdd1(reply, false);   // canSetDefaultStratum
   expandBufAdd1(reply, true);    // 1.6: canGetInstanceInfo
@@ -499,13 +499,18 @@
 
 /*
  * Returns the value of the SourceDebugExtension attribute.
- *
- * JDB seems interested, but DEX files don't currently support this.
  */
-static JdwpError RT_SourceDebugExtension(JdwpState*, Request*, ExpandBuf*)
+static JdwpError RT_SourceDebugExtension(JdwpState*, Request* request, ExpandBuf* pReply)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   /* referenceTypeId in, string out */
-  return ERR_ABSENT_INFORMATION;
+  RefTypeId refTypeId = request->ReadRefTypeId();
+  std::string extension_data;
+  JdwpError status = Dbg::GetSourceDebugExtension(refTypeId, &extension_data);
+  if (status != ERR_NONE) {
+    return status;
+  }
+  expandBufAddUtf8String(pReply, extension_data);
+  return ERR_NONE;
 }
 
 static JdwpError RT_Signature(JdwpState*, Request* request, ExpandBuf* pReply, bool with_generic)
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index d190bdf..1441987 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -212,6 +212,10 @@
 
 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+
+  // Resolve any new registered locations.
+  ResolveTrackedLocations();
+
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   std::set<DexCacheResolvedClasses> resolved_classes =
       class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
@@ -260,6 +264,10 @@
 
 bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+
+  // Resolve any new registered locations.
+  ResolveTrackedLocations();
+
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
     // Make a copy so that we don't hold the lock while doing I/O.
@@ -497,17 +505,34 @@
   return instance_ != nullptr;
 }
 
-void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
-                                       const std::vector<std::string>& code_paths) {
-  auto it = tracked_dex_base_locations_.find(output_filename);
-  if (it == tracked_dex_base_locations_.end()) {
-    tracked_dex_base_locations_.Put(output_filename,
-                                    std::set<std::string>(code_paths.begin(), code_paths.end()));
+static void AddTrackedLocationsToMap(const std::string& output_filename,
+                                     const std::vector<std::string>& code_paths,
+                                     SafeMap<std::string, std::set<std::string>>* map) {
+  auto it = map->find(output_filename);
+  if (it == map->end()) {
+    map->Put(output_filename, std::set<std::string>(code_paths.begin(), code_paths.end()));
   } else {
     it->second.insert(code_paths.begin(), code_paths.end());
   }
 }
 
+void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
+                                       const std::vector<std::string>& code_paths) {
+  // Add the code paths to the list of tracked location.
+  AddTrackedLocationsToMap(output_filename, code_paths, &tracked_dex_base_locations_);
+  // The code paths may contain symlinks which could fool the profiler.
+  // If the dex file is compiled with an absolute location but loaded with symlink
+  // the profiler could skip the dex due to location mismatch.
+  // To avoid this, we add the code paths to the temporary cache of 'to_be_resolved'
+  // locations. When the profiler thread executes we will resolve the paths to their
+  // real paths.
+  // Note that we delay taking the realpath to avoid spending more time than needed
+  // when registering location (as it is done during app launch).
+  AddTrackedLocationsToMap(output_filename,
+                           code_paths,
+                           &tracked_dex_base_locations_to_be_resolved_);
+}
+
 void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
@@ -556,4 +581,38 @@
   return false;
 }
 
+void ProfileSaver::ResolveTrackedLocations() {
+  SafeMap<std::string, std::set<std::string>> locations_to_be_resolved;
+  {
+    // Make a copy so that we don't hold the lock while doing I/O.
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    locations_to_be_resolved = tracked_dex_base_locations_to_be_resolved_;
+    tracked_dex_base_locations_to_be_resolved_.clear();
+  }
+
+  // Resolve the locations.
+  SafeMap<std::string, std::vector<std::string>> resolved_locations_map;
+  for (const auto& it : locations_to_be_resolved) {
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+    auto resolved_locations_it = resolved_locations_map.Put(
+        filename,
+        std::vector<std::string>(locations.size()));
+
+    for (const auto& location : locations) {
+      UniqueCPtr<const char[]> location_real(realpath(location.c_str(), nullptr));
+      // Note that it's ok if we cannot get the real path.
+      if (location_real != nullptr) {
+        resolved_locations_it->second.emplace_back(location_real.get());
+      }
+    }
+  }
+
+  // Add the resolved locations to the tracked collection.
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  for (const auto& it : resolved_locations_map) {
+    AddTrackedLocationsToMap(it.first, it.second, &tracked_dex_base_locations_);
+  }
+}
+
 }   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index be2bffc..bd539a4 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -112,6 +112,10 @@
 
   void DumpInfo(std::ostream& os);
 
+  // Resolve the realpath of the locations stored in tracked_dex_base_locations_to_be_resolved_
+  // and put the result in tracked_dex_base_locations_.
+  void ResolveTrackedLocations() REQUIRES(!Locks::profiler_lock_);
+
   // The only instance of the saver.
   static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
   // Profile saver thread.
@@ -119,11 +123,17 @@
 
   jit::JitCodeCache* jit_code_cache_;
 
-  // Collection of code paths that the profiles tracks.
+  // Collection of code paths that the profiler tracks.
   // It maps profile locations to code paths (dex base locations).
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
 
+  // Collection of code paths that the profiler tracks but may note have been resolved
+  // to their realpath. The resolution is done async to minimize the time it takes for
+  // someone to register a path.
+  SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_to_be_resolved_
+      GUARDED_BY(Locks::profiler_lock_);
+
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
   uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_);
   uint32_t jit_activity_notifications_;
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
index c8d256f..07aeb66 100644
--- a/runtime/jit/profile_saver_options.h
+++ b/runtime/jit/profile_saver_options.h
@@ -20,7 +20,7 @@
 
 struct ProfileSaverOptions {
  public:
-  static constexpr uint32_t kMinSavePeriodMs = 20 * 1000;  // 20 seconds
+  static constexpr uint32_t kMinSavePeriodMs = 40 * 1000;  // 40 seconds
   static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000;  // 5 seconds
   // Minimum number of JIT samples during launch to include a method into the profile.
   static constexpr uint32_t kStartupMethodSamples = 1;
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 0617dae..77554e8 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -622,6 +622,31 @@
   return env->NewStringUTF(new_filter_str.c_str());
 }
 
+static jstring DexFile_getSafeModeCompilerFilter(JNIEnv* env,
+                                                 jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                                 jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) {
+    return javaCompilerFilter;
+  }
+
+  CompilerFilter::Filter new_filter = CompilerFilter::GetSafeModeFilterFrom(filter);
+
+  // Filter stayed the same, return input.
+  if (filter == new_filter) {
+    return javaCompilerFilter;
+  }
+
+  // Create a new string object and return.
+  std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter);
+  return env->NewStringUTF(new_filter_str.c_str());
+}
+
 static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) {
   const OatFile* oat_file = nullptr;
   std::vector<const DexFile*> dex_files;
@@ -695,6 +720,9 @@
   NATIVE_METHOD(DexFile,
                 getNonProfileGuidedCompilerFilter,
                 "(Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile,
+                getSafeModeCompilerFilter,
+                "(Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile, getDexFileStatus,
                 "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 2eaa8c7..0515ec6 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -192,8 +192,8 @@
 
   const bool safe_mode = (debug_flags & DEBUG_ENABLE_SAFEMODE) != 0;
   if (safe_mode) {
-    // Ensure that any (secondary) oat files will be interpreted.
-    runtime->AddCompilerOption("--compiler-filter=interpret-only");
+    // Only quicken oat files.
+    runtime->AddCompilerOption("--compiler-filter=quicken");
     runtime->SetSafeMode(true);
     debug_flags &= ~DEBUG_ENABLE_SAFEMODE;
   }
diff --git a/runtime/oat.h b/runtime/oat.h
index 05706252..924f77c 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '9', '\0' };  // Add thread_local_limit.
+  static constexpr uint8_t kOatVersion[] = { '1', '2', '3', '\0' };  // ARM Baker link-time thunks.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 4a85d479..9affeb0 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -193,7 +193,7 @@
                            bool writable,
                            bool low_4gb,
                            std::string* error_msg) {
-  vdex_ = VdexFile::Open(vdex_filename, writable, low_4gb, error_msg);
+  vdex_ = VdexFile::Open(vdex_filename, writable, low_4gb, /* unquicken*/ false, error_msg);
   if (vdex_.get() == nullptr) {
     *error_msg = StringPrintf("Failed to load vdex file '%s' %s",
                               vdex_filename.c_str(),
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index a7be73a..603bbbf 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -68,19 +68,34 @@
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, load_executable)
-{ }
-
-OatFileAssistant::OatFileAssistant(const char* dex_location,
-                                   const char* oat_location,
-                                   const InstructionSet isa,
-                                   bool load_executable)
     : isa_(isa),
       load_executable_(load_executable),
       odex_(this, /*is_oat_location*/ false),
       oat_(this, /*is_oat_location*/ true) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
-  dex_location_.assign(dex_location);
+
+  // Try to get the realpath for the dex location.
+  //
+  // This is OK with respect to dalvik cache naming scheme because we never
+  // generate oat files starting from symlinks which go into dalvik cache.
+  // (recall that the oat files in dalvik cache are encoded by replacing '/'
+  // with '@' in the path).
+  // The boot image oat files (which are symlinked in dalvik-cache) are not
+  // loaded via the oat file assistant.
+  //
+  // The only case when the dex location may resolve to a different path
+  // is for secondary dex files (e.g. /data/user/0 symlinks to /data/data and
+  // the app is free to create its own internal layout). Related to this it is
+  // worthwhile to mention that installd resolves the secondary dex location
+  // before calling dex2oat.
+  UniqueCPtr<const char[]> dex_location_real(realpath(dex_location, nullptr));
+  if (dex_location_real != nullptr) {
+    dex_location_.assign(dex_location_real.get());
+  } else {
+    // If we can't get the realpath of the location there's not much point in trying to move on.
+    PLOG(ERROR) << "Could not get the realpath of dex_location " << dex_location;
+    return;
+  }
 
   if (load_executable_ && isa != kRuntimeISA) {
     LOG(WARNING) << "OatFileAssistant: Load executable specified, "
@@ -98,15 +113,27 @@
   }
 
   // Get the oat filename.
-  if (oat_location != nullptr) {
-    oat_.Reset(oat_location);
+  std::string oat_file_name;
+  if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) {
+    oat_.Reset(oat_file_name);
   } else {
-    std::string oat_file_name;
-    if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) {
-      oat_.Reset(oat_file_name);
-    } else {
-      LOG(WARNING) << "Failed to determine oat file name for dex location "
+    LOG(WARNING) << "Failed to determine oat file name for dex location "
         << dex_location_ << ": " << error_msg;
+  }
+
+  // Check if the dex directory is writable.
+  // This will be needed in most uses of OatFileAssistant and so it's OK to
+  // compute it eagerly. (the only use which will not make use of it is
+  // OatFileAssistant::GetStatusDump())
+  size_t pos = dex_location_.rfind('/');
+  if (pos == std::string::npos) {
+    LOG(WARNING) << "Failed to determine dex file parent directory: " << dex_location_;
+  } else {
+    std::string parent = dex_location_.substr(0, pos);
+    if (access(parent.c_str(), W_OK) == 0) {
+      dex_parent_writable_ = true;
+    } else {
+      VLOG(oat) << "Dex parent of " << dex_location_ << " is not writable: " << strerror(errno);
     }
   }
 }
@@ -139,12 +166,17 @@
   CHECK(error_msg != nullptr);
   CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired";
 
-  const std::string* oat_file_name = oat_.Filename();
-  if (oat_file_name == nullptr) {
-    *error_msg = "Failed to determine lock file";
-    return false;
-  }
-  std::string lock_file_name = *oat_file_name + ".flock";
+  // Note the lock will only succeed for secondary dex files and in test
+  // environment.
+  //
+  // The lock *will fail* for all primary apks in a production environment.
+  // The app does not have permissions to create locks next to its dex location
+  // (be it system, data or vendor parition). We also cannot use the odex or
+  // oat location for the same reasoning.
+  //
+  // This is best effort and if it fails it's unlikely that we will be able
+  // to generate oat files anyway.
+  std::string lock_file_name = dex_location_ + "." + GetInstructionSetString(isa_) + ".flock";
 
   if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
     unlink(lock_file_name.c_str());
@@ -170,7 +202,7 @@
   CHECK(filter != nullptr);
   CHECK(error_msg != nullptr);
 
-  *filter = CompilerFilter::kDefaultCompilerFilter;
+  *filter = OatFileAssistant::kDefaultCompilerFilterForDexLoading;
   for (StringPiece option : Runtime::Current()->GetCompilerOptions()) {
     if (option.starts_with("--compiler-filter=")) {
       const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
@@ -207,7 +239,7 @@
     case kDex2OatForBootImage:
     case kDex2OatForRelocation:
     case kDex2OatForFilter:
-      return GenerateOatFile(error_msg);
+      return GenerateOatFileNoChecks(info, error_msg);
   }
   UNREACHABLE();
 }
@@ -439,7 +471,7 @@
     VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter;
   }
 
-  if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
+  if (CompilerFilter::IsAotCompilationEnabled(current_compiler_filter)) {
     if (!file.IsPic()) {
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
@@ -479,8 +511,110 @@
   return kOatUpToDate;
 }
 
-OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::GenerateOatFile(std::string* error_msg) {
+static bool DexLocationToOdexNames(const std::string& location,
+                                   InstructionSet isa,
+                                   std::string* odex_filename,
+                                   std::string* oat_dir,
+                                   std::string* isa_dir,
+                                   std::string* error_msg) {
+  CHECK(odex_filename != nullptr);
+  CHECK(error_msg != nullptr);
+
+  // The odex file name is formed by replacing the dex_location extension with
+  // .odex and inserting an oat/<isa> directory. For example:
+  //   location = /foo/bar/baz.jar
+  //   odex_location = /foo/bar/oat/<isa>/baz.odex
+
+  // Find the directory portion of the dex location and add the oat/<isa>
+  // directory.
+  size_t pos = location.rfind('/');
+  if (pos == std::string::npos) {
+    *error_msg = "Dex location " + location + " has no directory.";
+    return false;
+  }
+  std::string dir = location.substr(0, pos+1);
+  // Add the oat directory.
+  dir += "oat";
+  if (oat_dir != nullptr) {
+    *oat_dir = dir;
+  }
+  // Add the isa directory
+  dir += "/" + std::string(GetInstructionSetString(isa));
+  if (isa_dir != nullptr) {
+    *isa_dir = dir;
+  }
+
+  // Get the base part of the file without the extension.
+  std::string file = location.substr(pos+1);
+  pos = file.rfind('.');
+  if (pos == std::string::npos) {
+    *error_msg = "Dex location " + location + " has no extension.";
+    return false;
+  }
+  std::string base = file.substr(0, pos);
+
+  *odex_filename = dir + "/" + base + ".odex";
+  return true;
+}
+
+// Prepare a subcomponent of the odex directory.
+// (i.e. create and set the expected permissions on the path `dir`).
+static bool PrepareDirectory(const std::string& dir, std::string* error_msg) {
+  struct stat dir_stat;
+  if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &dir_stat)) == 0) {
+    // The directory exists. Check if it is indeed a directory.
+    if (!S_ISDIR(dir_stat.st_mode)) {
+      *error_msg = dir + " is not a dir";
+      return false;
+    } else {
+      // The dir is already on disk.
+      return true;
+    }
+  }
+
+  // Failed to stat. We need to create the directory.
+  if (errno != ENOENT) {
+    *error_msg = "Could not stat isa dir " + dir + ":" + strerror(errno);
+    return false;
+  }
+
+  mode_t mode = S_IRWXU | S_IXGRP | S_IXOTH;
+  if (mkdir(dir.c_str(), mode) != 0) {
+    *error_msg = "Could not create dir " + dir + ":" + strerror(errno);
+    return false;
+  }
+  if (chmod(dir.c_str(), mode) != 0) {
+    *error_msg = "Could not create the oat dir " + dir + ":" + strerror(errno);
+    return false;
+  }
+  return true;
+}
+
+// Prepares the odex directory for the given dex location.
+static bool PrepareOdexDirectories(const std::string& dex_location,
+                                   const std::string& expected_odex_location,
+                                   InstructionSet isa,
+                                   std::string* error_msg) {
+  std::string actual_odex_location;
+  std::string oat_dir;
+  std::string isa_dir;
+  if (!DexLocationToOdexNames(
+        dex_location, isa, &actual_odex_location, &oat_dir, &isa_dir, error_msg)) {
+    return false;
+  }
+  DCHECK_EQ(expected_odex_location, actual_odex_location);
+
+  if (!PrepareDirectory(oat_dir, error_msg)) {
+    return false;
+  }
+  if (!PrepareDirectory(isa_dir, error_msg)) {
+    return false;
+  }
+  return true;
+}
+
+OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks(
+      OatFileAssistant::OatFileInfo& info, std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   Runtime* runtime = Runtime::Current();
@@ -490,22 +624,37 @@
     return kUpdateNotAttempted;
   }
 
-  if (oat_.Filename() == nullptr) {
+  if (info.Filename() == nullptr) {
     *error_msg = "Generation of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
     return kUpdateNotAttempted;
   }
-  const std::string& oat_file_name = *oat_.Filename();
+  const std::string& oat_file_name = *info.Filename();
   const std::string& vdex_file_name = ReplaceFileExtension(oat_file_name, "vdex");
 
   // dex2oat ignores missing dex files and doesn't report an error.
   // Check explicitly here so we can detect the error properly.
   // TODO: Why does dex2oat behave that way?
-  if (!OS::FileExists(dex_location_.c_str())) {
-    *error_msg = "Dex location " + dex_location_ + " does not exists.";
+  struct stat dex_path_stat;
+  if (TEMP_FAILURE_RETRY(stat(dex_location_.c_str(), &dex_path_stat)) != 0) {
+    *error_msg = "Could not access dex location " + dex_location_ + ":" + strerror(errno);
     return kUpdateNotAttempted;
   }
 
+  // If this is the odex location, we need to create the odex file layout (../oat/isa/..)
+  if (!info.IsOatLocation()) {
+    if (!PrepareOdexDirectories(dex_location_, oat_file_name, isa_, error_msg)) {
+      return kUpdateNotAttempted;
+    }
+  }
+
+  // Set the permissions for the oat and the vdex files.
+  // The user always gets read and write while the group and others propagate
+  // the reading access of the original dex file.
+  mode_t file_mode = S_IRUSR | S_IWUSR |
+      (dex_path_stat.st_mode & S_IRGRP) |
+      (dex_path_stat.st_mode & S_IROTH);
+
   std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_file_name.c_str()));
   if (vdex_file.get() == nullptr) {
     *error_msg = "Generation of oat file " + oat_file_name
@@ -514,7 +663,7 @@
     return kUpdateNotAttempted;
   }
 
-  if (fchmod(vdex_file->Fd(), 0644) != 0) {
+  if (fchmod(vdex_file->Fd(), file_mode) != 0) {
     *error_msg = "Generation of oat file " + oat_file_name
       + " not attempted because the vdex file " + vdex_file_name
       + " could not be made world readable.";
@@ -528,7 +677,7 @@
     return kUpdateNotAttempted;
   }
 
-  if (fchmod(oat_file->Fd(), 0644) != 0) {
+  if (fchmod(oat_file->Fd(), file_mode) != 0) {
     *error_msg = "Generation of oat file " + oat_file_name
       + " not attempted because the oat file could not be made world readable.";
     oat_file->Erase();
@@ -563,8 +712,8 @@
     return kUpdateFailed;
   }
 
-  // Mark that the oat file has changed and we should try to reload.
-  oat_.Reset();
+  // Mark that the odex file has changed and we should try to reload.
+  info.Reset();
   return kUpdateSucceeded;
 }
 
@@ -623,35 +772,7 @@
                                                  InstructionSet isa,
                                                  std::string* odex_filename,
                                                  std::string* error_msg) {
-  CHECK(odex_filename != nullptr);
-  CHECK(error_msg != nullptr);
-
-  // The odex file name is formed by replacing the dex_location extension with
-  // .odex and inserting an oat/<isa> directory. For example:
-  //   location = /foo/bar/baz.jar
-  //   odex_location = /foo/bar/oat/<isa>/baz.odex
-
-  // Find the directory portion of the dex location and add the oat/<isa>
-  // directory.
-  size_t pos = location.rfind('/');
-  if (pos == std::string::npos) {
-    *error_msg = "Dex location " + location + " has no directory.";
-    return false;
-  }
-  std::string dir = location.substr(0, pos+1);
-  dir += "oat/" + std::string(GetInstructionSetString(isa));
-
-  // Get the base part of the file without the extension.
-  std::string file = location.substr(pos+1);
-  pos = file.rfind('.');
-  if (pos == std::string::npos) {
-    *error_msg = "Dex location " + location + " has no extension.";
-    return false;
-  }
-  std::string base = file.substr(0, pos);
-
-  *odex_filename = dir + "/" + base + ".odex";
-  return true;
+  return DexLocationToOdexNames(location, isa, odex_filename, nullptr, nullptr, error_msg);
 }
 
 bool OatFileAssistant::DexLocationToOatFilename(const std::string& location,
@@ -752,8 +873,45 @@
 }
 
 OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() {
-  bool use_oat = oat_.IsUseable() || odex_.Status() == kOatCannotOpen;
-  return use_oat ? oat_ : odex_;
+  // TODO(calin): Document the side effects of class loading when
+  // running dalvikvm command line.
+  if (dex_parent_writable_) {
+    // If the parent of the dex file is writable it means that we can
+    // create the odex file. In this case we unconditionally pick the odex
+    // as the best oat file. This corresponds to the regular use case when
+    // apps gets installed or when they load private, secondary dex file.
+    // For apps on the system partition the odex location will not be
+    // writable and thus the oat location might be more up to date.
+    return odex_;
+  }
+
+  // We cannot write to the odex location. This must be a system app.
+
+  // If the oat location is usable take it.
+  if (oat_.IsUseable()) {
+    return oat_;
+  }
+
+  // The oat file is not usable but the odex file might be up to date.
+  // This is an indication that we are dealing with an up to date prebuilt
+  // (that doesn't need relocation).
+  if (odex_.Status() == kOatUpToDate) {
+    return odex_;
+  }
+
+  // The oat file is not usable and the odex file is not up to date.
+  // However we have access to the original dex file which means we can make
+  // the oat location up to date.
+  if (HasOriginalDexFiles()) {
+    return oat_;
+  }
+
+  // We got into the worst situation here:
+  // - the oat location is not usable
+  // - the prebuild odex location is not up to date
+  // - and we don't have the original dex file anymore (stripped).
+  // Pick the odex if it exists, or the oat if not.
+  return (odex_.Status() == kOatCannotOpen) ? oat_ : odex_;
 }
 
 std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
@@ -808,6 +966,7 @@
       std::unique_ptr<VdexFile> vdex = VdexFile::Open(vdex_filename,
                                                       /*writeable*/false,
                                                       /*low_4gb*/false,
+                                                      /*unquicken*/false,
                                                       &error_msg);
       if (vdex == nullptr) {
         status_ = kOatCannotOpen;
@@ -834,7 +993,7 @@
 
 OatFileAssistant::DexOptNeeded OatFileAssistant::OatFileInfo::GetDexOptNeeded(
     CompilerFilter::Filter target, bool profile_changed) {
-  bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
+  bool compilation_desired = CompilerFilter::IsAotCompilationEnabled(target);
   bool filter_okay = CompilerFilterIsOkay(target, profile_changed);
 
   if (filter_okay && Status() == kOatUpToDate) {
@@ -848,24 +1007,24 @@
     return kNoDexOptNeeded;
   }
 
-  if (oat_file_assistant_->HasOriginalDexFiles()) {
-    if (filter_okay && Status() == kOatRelocationOutOfDate) {
-      return kDex2OatForRelocation;
-    }
-
-    if (IsUseable()) {
-      return kDex2OatForFilter;
-    }
-
-    if (Status() == kOatBootImageOutOfDate) {
-      return kDex2OatForBootImage;
-    }
-
-    return kDex2OatFromScratch;
+  if (filter_okay && Status() == kOatRelocationOutOfDate) {
+    return kDex2OatForRelocation;
   }
 
-  // Otherwise there is nothing we can do, even if we want to.
-  return kNoDexOptNeeded;
+  if (IsUseable()) {
+    return kDex2OatForFilter;
+  }
+
+  if (Status() == kOatBootImageOutOfDate) {
+    return kDex2OatForBootImage;
+  }
+
+  if (oat_file_assistant_->HasOriginalDexFiles()) {
+    return kDex2OatFromScratch;
+  } else {
+    // Otherwise there is nothing we can do, even if we want to.
+    return kNoDexOptNeeded;
+  }
 }
 
 const OatFile* OatFileAssistant::OatFileInfo::GetFile() {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index b84e711..7e2385e 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -47,6 +47,11 @@
 // dex location is in the boot class path.
 class OatFileAssistant {
  public:
+  // The default compile filter to use when optimizing dex file at load time if they
+  // are out of date.
+  static const CompilerFilter::Filter kDefaultCompilerFilterForDexLoading =
+      CompilerFilter::kQuicken;
+
   enum DexOptNeeded {
     // No dexopt should (or can) be done to update the apk/jar.
     // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0
@@ -117,13 +122,6 @@
                    const InstructionSet isa,
                    bool load_executable);
 
-  // Constructs an OatFileAssistant, providing an explicit target oat_location
-  // to use instead of the standard oat location.
-  OatFileAssistant(const char* dex_location,
-                   const char* oat_location,
-                   const InstructionSet isa,
-                   bool load_executable);
-
   ~OatFileAssistant();
 
   // Returns true if the dex location refers to an element of the boot class
@@ -232,16 +230,6 @@
   // Returns the status of the oat file for the dex location.
   OatStatus OatFileStatus();
 
-  // Generate the oat file from the dex file using the current runtime
-  // compiler options.
-  // This does not check the current status before attempting to generate the
-  // oat file.
-  //
-  // If the result is not kUpdateSucceeded, the value of error_msg will be set
-  // to a string describing why there was a failure or the update was not
-  // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg);
-
   // Executes dex2oat using the current runtime configuration overridden with
   // the given arguments. This does not check to see if dex2oat is enabled in
   // the runtime configuration.
@@ -377,6 +365,16 @@
     bool file_released_ = false;
   };
 
+  // Generate the oat file for the given info from the dex file using the
+  // current runtime compiler options.
+  // This does not check the current status before attempting to generate the
+  // oat file.
+  //
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info, std::string* error_msg);
+
   // Return info for the best oat file.
   OatFileInfo& GetBestInfo();
 
@@ -422,6 +420,9 @@
 
   std::string dex_location_;
 
+  // Whether or not the parent directory of the dex file is writable.
+  bool dex_parent_writable_ = false;
+
   // In a properly constructed OatFileAssistant object, isa_ should be either
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
@@ -446,6 +447,8 @@
   bool image_info_load_attempted_ = false;
   std::unique_ptr<ImageInfo> cached_image_info_;
 
+  friend class OatFileAssistantTest;
+
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 4a738ab..b2b86ee 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -43,6 +43,38 @@
   }
 };
 
+class ScopedNonWritable {
+ public:
+  explicit ScopedNonWritable(const std::string& dex_location) {
+    is_valid_ = false;
+    size_t pos = dex_location.rfind('/');
+    if (pos != std::string::npos) {
+      is_valid_ = true;
+      dex_parent_ = dex_location.substr(0, pos);
+      if (chmod(dex_parent_.c_str(), 0555) != 0)  {
+        PLOG(ERROR) << "Could not change permissions on " << dex_parent_;
+      }
+    }
+  }
+
+  bool IsSuccessful() { return is_valid_ && (access(dex_parent_.c_str(), W_OK) != 0); }
+
+  ~ScopedNonWritable() {
+    if (is_valid_) {
+      if (chmod(dex_parent_.c_str(), 0777) != 0) {
+        PLOG(ERROR) << "Could not restore permissions on " << dex_parent_;
+      }
+    }
+  }
+
+ private:
+  std::string dex_parent_;
+  bool is_valid_;
+};
+
+static bool IsExecutedAsRoot() {
+  return geteuid() == 0;
+}
 
 // Case: We have a DEX file, but no OAT file for it.
 // Expect: The status is kDex2OatNeeded.
@@ -53,9 +85,9 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
@@ -87,21 +119,134 @@
   EXPECT_EQ(nullptr, oat_file.get());
 }
 
+// Case: We have a DEX file and a PIC ODEX file, but no OAT file.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
+TEST_F(OatFileAssistantTest, OdexUpToDate) {
+  std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar";
+  std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex";
+  Copy(GetDexSrc1(), dex_location);
+  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+
+  // For the use of oat location by making the dex parent not writable.
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and a PIC ODEX file, but no OAT file. We load the dex
+// file via a symlink.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
+TEST_F(OatFileAssistantTest, OdexUpToDateSymLink) {
+  std::string scratch_dir = GetScratchDir();
+  std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar";
+  std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex";
+
+  Copy(GetDexSrc1(), dex_location);
+  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+
+  // Now replace the dex location with a symlink.
+  std::string link = scratch_dir + "/link";
+  ASSERT_EQ(0, symlink(scratch_dir.c_str(), link.c_str()));
+  dex_location = link + "/OdexUpToDate.jar";
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
 // Case: We have a DEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, OatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
+  // For the use of oat location by making the dex parent not writable.
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and up-to-date OAT file for it. We load the dex file
+// via a symlink.
+// Expect: The status is kNoDexOptNeeded.
+TEST_F(OatFileAssistantTest, OatUpToDateSymLink) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
+  std::string real = GetScratchDir() + "/real";
+  ASSERT_EQ(0, mkdir(real.c_str(), 0700));
+  std::string link = GetScratchDir() + "/link";
+  ASSERT_EQ(0, symlink(real.c_str(), link.c_str()));
+
+  std::string dex_location = real + "/OatUpToDate.jar";
+
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
+
+  // Update the dex location to point to the symlink.
+  dex_location = link + "/OatUpToDate.jar";
+
+  // For the use of oat location by making the dex parent not writable.
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
@@ -120,19 +265,16 @@
   }
 
   std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOdex.jar";
-  std::string oat_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat";
+  std::string odex_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat";
 
   Copy(GetDexSrc1(), dex_location);
 
   // Generating and deleting the oat file should have the side effect of
   // creating an up-to-date vdex file.
-  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
-  ASSERT_EQ(0, unlink(oat_location.c_str()));
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+  ASSERT_EQ(0, unlink(odex_location.c_str()));
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA,
-                                      false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   // Even though the vdex file is up to date, because we don't have the oat
   // file, we can't know that the vdex depends on the boot image and is up to
@@ -169,6 +311,11 @@
   if (!kIsVdexEnabled) {
     return;
   }
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
 
   std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOat.jar";
   std::string oat_location;
@@ -180,6 +327,8 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   ASSERT_EQ(0, unlink(oat_location.c_str()));
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   // Even though the vdex file is up to date, because we don't have the oat
@@ -195,20 +344,29 @@
 // Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
 // kDex2Oat if the profile has changed.
 TEST_F(OatFileAssistantTest, ProfileOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken, false));
   EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true));
   EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken, true));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
@@ -219,10 +377,19 @@
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar";
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
@@ -240,6 +407,12 @@
 // Case: We have a MultiDEX file where the non-main multdex entry is out of date.
 // Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/MultiDexNonMainOutOfDate.jar";
 
   // Compile code for GetMultiDexSrc1.
@@ -250,6 +423,9 @@
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
@@ -269,7 +445,7 @@
   // Compile the odex from GetMultiDexSrc2, which has a different non-main
   // dex checksum.
   Copy(GetMultiDexSrc2(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kInterpretOnly);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kQuicken);
 
   // Strip the dex file.
   Copy(GetStrippedDexSrc1(), dex_location);
@@ -287,12 +463,12 @@
   EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus());
 }
 
-// Case: We have a MultiDEX file and up-to-date OAT file for it with relative
+// Case: We have a MultiDEX file and up-to-date ODEX file for it with relative
 // encoded dex locations.
 // Expect: The oat file status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
   std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar";
-  std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat";
+  std::string odex_location = GetOdexDir() + "/RelativeEncodedDexLocation.odex";
 
   // Create the dex file
   Copy(GetMultiDexSrc1(), dex_location);
@@ -301,16 +477,15 @@
   std::vector<std::string> args;
   args.push_back("--dex-file=" + dex_location);
   args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar"));
-  args.push_back("--oat-file=" + oat_location);
+  args.push_back("--oat-file=" + odex_location);
   args.push_back("--compiler-filter=speed");
 
   std::string error_msg;
   ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
 
   // Verify we can load both dex files.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -322,6 +497,12 @@
 // Case: We have a DEX file and an OAT file out of date with respect to the
 // dex checksum.
 TEST_F(OatFileAssistantTest, OatDexOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatDexOutOfDate.jar";
 
   // We create a dex, generate an oat for it, then overwrite the dex with a
@@ -330,9 +511,12 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -351,17 +535,14 @@
   }
 
   std::string dex_location = GetScratchDir() + "/VdexDexOutOfDate.jar";
-  std::string oat_location = GetOdexDir() + "/VdexDexOutOfDate.oat";
+  std::string odex_location = GetOdexDir() + "/VdexDexOutOfDate.oat";
 
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
-  ASSERT_EQ(0, unlink(oat_location.c_str()));
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+  ASSERT_EQ(0, unlink(odex_location.c_str()));
   Copy(GetDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA,
-                                      false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -376,17 +557,14 @@
   }
 
   std::string dex_location = GetScratchDir() + "/VdexMultiDexNonMainOutOfDate.jar";
-  std::string oat_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.oat";
+  std::string odex_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.odex";
 
   Copy(GetMultiDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
-  ASSERT_EQ(0, unlink(oat_location.c_str()));
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+  ASSERT_EQ(0, unlink(odex_location.c_str()));
   Copy(GetMultiDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA,
-                                      false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -395,6 +573,12 @@
 // Case: We have a DEX file and an OAT file out of date with respect to the
 // boot image.
 TEST_F(OatFileAssistantTest, OatImageOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatImageOutOfDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
@@ -404,11 +588,14 @@
                      /*pic*/false,
                      /*with_alternate_image*/true);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
   EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -423,20 +610,29 @@
 // It shouldn't matter that the OAT file is out of date, because it is
 // verify-at-runtime.
 TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatVerifyAtRuntimeImageOutOfDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(),
-                     CompilerFilter::kVerifyAtRuntime,
+                     CompilerFilter::kExtract,
                      /*relocate*/true,
                      /*pic*/false,
                      /*with_alternate_image*/true);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
@@ -457,7 +653,7 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -523,10 +719,10 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,  // Can't run dex2oat because dex file is stripped.
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,  // Compiling from the .vdex file
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
@@ -556,9 +752,9 @@
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
@@ -586,24 +782,23 @@
 TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar";
   std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex";
-  std::string oat_location = GetOdexDir() + "/OdexOatOverlap.oat";
 
-  // Create the dex and odex files
+  // Create the dex, the odex and the oat files.
   Copy(GetDexSrc1(), dex_location);
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
-
-  // Create the oat file by copying the odex so they are located in the same
-  // place in memory.
-  Copy(odex_location, oat_location);
+  GenerateOatForTest(dex_location.c_str(),
+                     CompilerFilter::kSpeed,
+                     /*relocate*/false,
+                     /*pic*/false,
+                     /*with_alternate_image*/false);
 
   // Verify things don't go bad.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  // kDex2OatForRelocation is expected rather than -kDex2OatForRelocation
-  // based on the assumption that the oat location is more up-to-date than the odex
+  // -kDex2OatForRelocation is expected rather than kDex2OatForRelocation
+  // based on the assumption that the odex location is more up-to-date than the oat
   // location, even if they both need relocation.
-  EXPECT_EQ(OatFileAssistant::kDex2OatForRelocation,
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
@@ -621,30 +816,6 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a DEX file and a PIC ODEX file, but no OAT file.
-// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
-TEST_F(OatFileAssistantTest, DexPicOdexNoOat) {
-  std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar";
-  std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex";
-
-  // Create the dex and odex files
-  Copy(GetDexSrc1(), dex_location);
-  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
-
-  // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
-
-  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
-  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
-  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
-}
-
 // Case: We have a DEX file and a VerifyAtRuntime ODEX file, but no OAT file.
 // Expect: The status is kNoDexOptNeeded, because VerifyAtRuntime contains no code.
 TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) {
@@ -653,13 +824,13 @@
 
   // Create the dex and odex files
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime);
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kExtract);
 
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -672,11 +843,20 @@
 // Case: We have a DEX file and up-to-date OAT file for it.
 // Expect: We should load an executable dex file.
 TEST_F(OatFileAssistantTest, LoadOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/LoadOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   // Load the oat using an oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
@@ -688,13 +868,22 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a DEX file and up-to-date interpret-only OAT file for it.
+// Case: We have a DEX file and up-to-date quicken OAT file for it.
 // Expect: We should still load the oat file as executable.
 TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/LoadExecInterpretOnlyOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kInterpretOnly);
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kQuicken);
+
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
 
   // Load the oat using an oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
@@ -710,9 +899,19 @@
 // Case: We have a DEX file and up-to-date OAT file for it.
 // Expect: Loading non-executable should load the oat non-executable.
 TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/LoadNoExecOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
+
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
@@ -726,70 +925,33 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a DEX file.
-// Expect: We should load an executable dex file from an alternative oat
-// location.
-TEST_F(OatFileAssistantTest, LoadDexNoAlternateOat) {
-  std::string dex_location = GetScratchDir() + "/LoadDexNoAlternateOat.jar";
-  std::string oat_location = GetScratchDir() + "/LoadDexNoAlternateOat.oat";
-
-  Copy(GetDexSrc1(), dex_location);
-
-  OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
-  std::string error_msg;
-  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
-  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
-
-  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
-  ASSERT_TRUE(oat_file.get() != nullptr);
-  EXPECT_TRUE(oat_file->IsExecutable());
-  std::vector<std::unique_ptr<const DexFile>> dex_files;
-  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
-  EXPECT_EQ(1u, dex_files.size());
-
-  EXPECT_TRUE(OS::FileExists(oat_location.c_str()));
-
-  // Verify it didn't create an oat in the default location.
-  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus());
-}
-
-// Case: We have a DEX file but can't write the oat file.
-// Expect: We should fail to make the oat file up to date.
-TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) {
-  std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar";
-
-  // Make the oat location unwritable by inserting some non-existent
-  // intermediate directories.
-  std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat";
-
-  Copy(GetDexSrc1(), dex_location);
-
-  OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
-  std::string error_msg;
-  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
-  ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(false, &error_msg));
-
-  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
-  ASSERT_TRUE(oat_file.get() == nullptr);
-}
-
 // Case: We don't have a DEX file and can't write the oat file.
 // Expect: We should fail to generate the oat file without crashing.
 TEST_F(OatFileAssistantTest, GenNoDex) {
-  std::string dex_location = GetScratchDir() + "/GenNoDex.jar";
-  std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
 
-  OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+  std::string dex_location = GetScratchDir() + "/GenNoDex.jar";
+
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
-  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.GenerateOatFile(&error_msg));
+  // We should get kUpdateSucceeded from MakeUpToDate since there's nothing
+  // that can be done in this situation.
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
+
+  // Verify it didn't create an oat in the default location (dalvik-cache).
+  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus());
+  // Verify it didn't create the odex file in the default location (../oat/isa/...odex)
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OdexFileStatus());
 }
 
 // Turn an absolute path into a path relative to the current working
@@ -1003,19 +1165,19 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   std::string error_msg;
-  Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only");
+  Runtime::Current()->AddCompilerOption("--compiler-filter=quicken");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
       oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
       oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 6799918..c1cf800 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -546,8 +546,8 @@
   std::vector<const DexFile*> dex_files_loaded;
 
   // Try to get dex files from the given class loader. If the class loader is null, or we do
-  // not support one of the class loaders in the chain, conservatively compare against all
-  // (non-boot) oat files.
+  // not support one of the class loaders in the chain, we do nothing and assume the collision
+  // check has succeeded.
   bool class_loader_ok = false;
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -566,37 +566,20 @@
     } else if (h_class_loader != nullptr) {
       VLOG(class_linker) << "Something unsupported with "
                          << mirror::Class::PrettyClass(h_class_loader->GetClass());
+
+      // This is a class loader we don't recognize. Our earlier strategy would
+      // be to perform a global duplicate class check (with all loaded oat files)
+      // but that seems overly conservative - we have no way of knowing that
+      // those files are present in the same loader hierarchy. Among other
+      // things, it hurt GMS core and its filtering class loader.
     }
   }
 
-  // Dex files are registered late - once a class is actually being loaded. We have to compare
-  // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses.
-  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
-
-  // Vector that holds the newly opened dex files live, this is done to prevent leaks.
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
-
+  // Exit if we find a class loader we don't recognize. Proceed to check shared
+  // libraries and do a full class loader check otherwise.
   if (!class_loader_ok) {
-    // Add dex files from already loaded oat files, but skip boot.
-
-    // Clean up the dex files.
-    dex_files_loaded.clear();
-
-    std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
-    // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
-    // need to check both against each other since they would have resolved the same way at compile
-    // time.
-    std::unordered_set<std::string> unique_locations;
-    for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
-      DCHECK_NE(loaded_oat_file.get(), oat_file);
-      const std::string& location = loaded_oat_file->GetLocation();
-      if (std::find(boot_oat_files.begin(), boot_oat_files.end(), loaded_oat_file.get()) ==
-          boot_oat_files.end() && location != oat_file->GetLocation() &&
-          unique_locations.find(location) == unique_locations.end()) {
-        unique_locations.insert(location);
-        AddDexFilesFromOat(loaded_oat_file.get(), &dex_files_loaded, &opened_dex_files);
-      }
-    }
+      LOG(WARNING) << "Skipping duplicate class check due to unrecognized classloader";
+      return false;
   }
 
   // Exit if shared libraries are ok. Do a full duplicate classes check otherwise.
@@ -606,6 +589,9 @@
     return false;
   }
 
+  // Vector that holds the newly opened dex files live, this is done to prevent leaks.
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+
   ScopedTrace st("Collision check");
   // Add dex files from the oat file to check.
   std::vector<const DexFile*> dex_files_unloaded;
@@ -629,9 +615,7 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   Runtime* const runtime = Runtime::Current();
 
-  // TODO(calin): remove the explicit oat_location for OatFileAssistant
   OatFileAssistant oat_file_assistant(dex_location,
-                                      /*oat_location*/ nullptr,
                                       kRuntimeISA,
                                       !runtime->IsAotCompiler());
 
@@ -677,21 +661,34 @@
     if (!accept_oat_file) {
       // Failed the collision check. Print warning.
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-        LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
-                     << dex_location;
+        if (!oat_file_assistant.HasOriginalDexFiles()) {
+          // We need to fallback but don't have original dex files. We have to
+          // fallback to opening the existing oat file. This is potentially
+          // unsafe so we warn about it.
+          accept_oat_file = true;
+
+          LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
+                       << "Allow oat file use. This is potentially dangerous.";
+        } else {
+          // We have to fallback and found original dex files - extract them from an APK.
+          // Also warn about this operation because it's potentially wasteful.
+          LOG(WARNING) << "Found duplicate classes, falling back to extracting from APK : "
+                       << dex_location;
+          LOG(WARNING) << "NOTE: This wastes RAM and hurts startup performance.";
+        }
       } else {
+        // TODO: We should remove this. The fact that we're here implies -Xno-dex-file-fallback
+        // was set, which means that we should never fallback. If we don't have original dex
+        // files, we should just fail resolution as the flag intended.
+        if (!oat_file_assistant.HasOriginalDexFiles()) {
+          accept_oat_file = true;
+        }
+
         LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
                         " load classes for " << dex_location;
       }
-      LOG(WARNING) << error_msg;
 
-      // However, if the app was part of /system and preopted, there is no original dex file
-      // available. In that case grudgingly accept the oat file.
-      if (!oat_file_assistant.HasOriginalDexFiles()) {
-        accept_oat_file = true;
-        LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
-                     << "Allow oat file use. This is potentially dangerous.";
-      }
+      LOG(WARNING) << error_msg;
     }
 
     if (accept_oat_file) {
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index c3a94b9..4c00317 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1078,9 +1078,156 @@
                                           jint* extension_count_ptr,
                                           jvmtiExtensionFunctionInfo** extensions) {
     ENSURE_VALID_ENV(env);
-    // We do not have any extension functions.
-    *extension_count_ptr = 0;
-    *extensions = nullptr;
+    ENSURE_NON_NULL(extension_count_ptr);
+    ENSURE_NON_NULL(extensions);
+
+    std::vector<jvmtiExtensionFunctionInfo> ext_vector;
+
+    // Holders for allocated values.
+    std::vector<JvmtiUniquePtr<char[]>> char_buffers;
+    std::vector<JvmtiUniquePtr<jvmtiParamInfo[]>> param_buffers;
+    std::vector<JvmtiUniquePtr<jvmtiError[]>> error_buffers;
+
+    // Add a helper struct that takes an arbitrary const char*. add_extension will use Allocate
+    // appropriately.
+    struct CParamInfo {
+      const char* name;
+      jvmtiParamKind kind;
+      jvmtiParamTypes base_type;
+      jboolean null_ok;
+    };
+
+    auto add_extension = [&](jvmtiExtensionFunction func,
+                             const char* id,
+                             const char* short_description,
+                             jint param_count,
+                             const std::vector<CParamInfo>& params,
+                             jint error_count,
+                             const std::vector<jvmtiError>& errors) {
+      jvmtiExtensionFunctionInfo func_info;
+      jvmtiError error;
+
+      func_info.func = func;
+
+      JvmtiUniquePtr<char[]> id_ptr = CopyString(env, id, &error);
+      if (id_ptr == nullptr) {
+        return error;
+      }
+      func_info.id = id_ptr.get();
+      char_buffers.push_back(std::move(id_ptr));
+
+      JvmtiUniquePtr<char[]> descr = CopyString(env, short_description, &error);
+      if (descr == nullptr) {
+        return error;
+      }
+      func_info.short_description = descr.get();
+      char_buffers.push_back(std::move(descr));
+
+      func_info.param_count = param_count;
+      if (param_count > 0) {
+        JvmtiUniquePtr<jvmtiParamInfo[]> params_ptr =
+            AllocJvmtiUniquePtr<jvmtiParamInfo[]>(env, param_count, &error);
+        if (params_ptr == nullptr) {
+          return error;
+        }
+        func_info.params = params_ptr.get();
+        param_buffers.push_back(std::move(params_ptr));
+
+        for (jint i = 0; i != param_count; ++i) {
+          JvmtiUniquePtr<char[]> param_name = CopyString(env, params[i].name, &error);
+          if (param_name == nullptr) {
+            return error;
+          }
+          func_info.params[i].name = param_name.get();
+          char_buffers.push_back(std::move(param_name));
+
+          func_info.params[i].kind = params[i].kind;
+          func_info.params[i].base_type = params[i].base_type;
+          func_info.params[i].null_ok = params[i].null_ok;
+        }
+      } else {
+        func_info.params = nullptr;
+      }
+
+      func_info.error_count = error_count;
+      if (error_count > 0) {
+        JvmtiUniquePtr<jvmtiError[]> errors_ptr =
+            AllocJvmtiUniquePtr<jvmtiError[]>(env, error_count, &error);
+        if (errors_ptr == nullptr) {
+          return error;
+        }
+        func_info.errors = errors_ptr.get();
+        error_buffers.push_back(std::move(errors_ptr));
+
+        for (jint i = 0; i != error_count; ++i) {
+          func_info.errors[i] = errors[i];
+        }
+      } else {
+        func_info.errors = nullptr;
+      }
+
+      ext_vector.push_back(func_info);
+
+      return ERR(NONE);
+    };
+
+    jvmtiError error;
+
+    // Heap extensions.
+    error = add_extension(
+        reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetObjectHeapId),
+        "com.android.art.heap.get_object_heap_id",
+        "Retrieve the heap id of the the object tagged with the given argument. An "
+            "arbitrary object is chosen if multiple objects exist with the same tag.",
+        2,
+        {                                                          // NOLINT [whitespace/braces] [4]
+            { "tag", JVMTI_KIND_IN, JVMTI_TYPE_JLONG, false},
+            { "heap_id", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false}
+        },
+        1,
+        { JVMTI_ERROR_NOT_FOUND });
+    if (error != ERR(NONE)) {
+      return error;
+    }
+
+    error = add_extension(
+        reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetHeapName),
+        "com.android.art.heap.get_heap_name",
+        "Retrieve the name of the heap with the given id.",
+        2,
+        {                                                          // NOLINT [whitespace/braces] [4]
+            { "heap_id", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false},
+            { "heap_name", JVMTI_KIND_ALLOC_BUF, JVMTI_TYPE_CCHAR, false}
+        },
+        1,
+        { JVMTI_ERROR_ILLEGAL_ARGUMENT });
+    if (error != ERR(NONE)) {
+      return error;
+    }
+
+    // Copy into output buffer.
+
+    *extension_count_ptr = ext_vector.size();
+    JvmtiUniquePtr<jvmtiExtensionFunctionInfo[]> out_data =
+        AllocJvmtiUniquePtr<jvmtiExtensionFunctionInfo[]>(env, ext_vector.size(), &error);
+    if (out_data == nullptr) {
+      return error;
+    }
+    memcpy(out_data.get(),
+           ext_vector.data(),
+           ext_vector.size() * sizeof(jvmtiExtensionFunctionInfo));
+    *extensions = out_data.release();
+
+    // Release all the buffer holders, we're OK now.
+    for (auto& holder : char_buffers) {
+      holder.release();
+    }
+    for (auto& holder : param_buffers) {
+      holder.release();
+    }
+    for (auto& holder : error_buffers) {
+      holder.release();
+    }
 
     return ERR(NONE);
   }
diff --git a/runtime/openjdkjvmti/fixed_up_dex_file.cc b/runtime/openjdkjvmti/fixed_up_dex_file.cc
index 3338358..29aebae 100644
--- a/runtime/openjdkjvmti/fixed_up_dex_file.cc
+++ b/runtime/openjdkjvmti/fixed_up_dex_file.cc
@@ -32,10 +32,8 @@
 #include "fixed_up_dex_file.h"
 #include "dex_file-inl.h"
 
-// Compiler includes.
-#include "dex/dex_to_dex_decompiler.h"
-
 // Runtime includes.
+#include "dex_to_dex_decompiler.h"
 #include "oat_file.h"
 #include "vdex_file.h"
 
diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
index f67fffc..64ab3e7 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table-inl.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
@@ -384,6 +384,23 @@
   return ERR(NONE);
 }
 
+template <typename T>
+art::mirror::Object* JvmtiWeakTable<T>::Find(T tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  for (auto& pair : tagged_objects_) {
+    if (tag == pair.second) {
+      art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>();
+      if (obj != nullptr) {
+        return obj;
+      }
+    }
+  }
+  return nullptr;
+}
+
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
index eeea75a..a6fd247 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -116,6 +116,10 @@
   void Unlock() RELEASE(allow_disallow_lock_);
   void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
 
+  art::mirror::Object* Find(T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
  protected:
   // Should HandleNullSweep be called when Sweep detects the release of an object?
   virtual bool DoesHandleNullOnSweep() {
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index 7fc5104..9b4dcaa 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -1400,4 +1400,95 @@
 
   return ERR(NONE);
 }
+
+static constexpr jint kHeapIdDefault = 0;
+static constexpr jint kHeapIdImage = 1;
+static constexpr jint kHeapIdZygote = 2;
+static constexpr jint kHeapIdApp = 3;
+
+jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...) {
+  if (heap_id == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::Thread* self = art::Thread::Current();
+
+  auto work = [&]() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    ObjectTagTable* tag_table = ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get();
+    art::ObjPtr<art::mirror::Object> obj = tag_table->Find(tag);
+    if (obj == nullptr) {
+      return ERR(NOT_FOUND);
+    }
+
+    art::gc::Heap* const heap = art::Runtime::Current()->GetHeap();
+    const art::gc::space::ContinuousSpace* const space =
+        heap->FindContinuousSpaceFromObject(obj, true);
+    jint heap_type = kHeapIdApp;
+    if (space != nullptr) {
+      if (space->IsZygoteSpace()) {
+        heap_type = kHeapIdZygote;
+      } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
+        // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects
+        // as HPROF_HEAP_APP. b/35762934
+        heap_type = kHeapIdImage;
+      }
+    } else {
+      const auto* los = heap->GetLargeObjectsSpace();
+      if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(self, obj.Ptr())) {
+        heap_type = kHeapIdZygote;
+      }
+    }
+    *heap_id = heap_type;
+    return ERR(NONE);
+  };
+
+  if (!art::Locks::mutator_lock_->IsSharedHeld(self)) {
+    if (!self->IsThreadSuspensionAllowable()) {
+      return ERR(INTERNAL);
+    }
+    art::ScopedObjectAccess soa(self);
+    return work();
+  } else {
+    // We cannot use SOA in this case. We might be holding the lock, but may not be in the
+    // runnable state (e.g., during GC).
+    art::Locks::mutator_lock_->AssertSharedHeld(self);
+    // TODO: Investigate why ASSERT_SHARED_CAPABILITY doesn't work.
+    auto annotalysis_workaround = [&]() NO_THREAD_SAFETY_ANALYSIS {
+      return work();
+    };
+    return annotalysis_workaround();
+  }
+}
+
+static jvmtiError CopyStringAndReturn(jvmtiEnv* env, const char* in, char** out) {
+  jvmtiError error;
+  JvmtiUniquePtr<char[]> param_name = CopyString(env, in, &error);
+  if (param_name == nullptr) {
+    return error;
+  }
+  *out = param_name.release();
+  return ERR(NONE);
+}
+
+static constexpr const char* kHeapIdDefaultName = "default";
+static constexpr const char* kHeapIdImageName = "image";
+static constexpr const char* kHeapIdZygoteName = "zygote";
+static constexpr const char* kHeapIdAppName = "app";
+
+jvmtiError HeapExtensions::GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...) {
+  switch (heap_id) {
+    case kHeapIdDefault:
+      return CopyStringAndReturn(env, kHeapIdDefaultName, heap_name);
+    case kHeapIdImage:
+      return CopyStringAndReturn(env, kHeapIdImageName, heap_name);
+    case kHeapIdZygote:
+      return CopyStringAndReturn(env, kHeapIdZygoteName, heap_name);
+    case kHeapIdApp:
+      return CopyStringAndReturn(env, kHeapIdAppName, heap_name);
+
+    default:
+      return ERR(ILLEGAL_ARGUMENT);
+  }
+}
+
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h
index dccecb4..b4b71ba 100644
--- a/runtime/openjdkjvmti/ti_heap.h
+++ b/runtime/openjdkjvmti/ti_heap.h
@@ -56,6 +56,12 @@
   ObjectTagTable* tags_;
 };
 
+class HeapExtensions {
+ public:
+  static jvmtiError JNICALL GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...);
+  static jvmtiError JNICALL GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...);
+};
+
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0125539..b1acec6 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -336,6 +336,16 @@
     jit_->DeleteThreadPool();
   }
 
+  // Make sure our internal threads are dead before we start tearing down things they're using.
+  Dbg::StopJdwp();
+  delete signal_catcher_;
+
+  // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
+  {
+    ScopedTrace trace2("Delete thread list");
+    thread_list_->ShutDown();
+  }
+
   // TODO Maybe do some locking.
   for (auto& agent : agents_) {
     agent.Unload();
@@ -346,15 +356,9 @@
     plugin.Unload();
   }
 
-  // Make sure our internal threads are dead before we start tearing down things they're using.
-  Dbg::StopJdwp();
-  delete signal_catcher_;
+  // Finally delete the thread list.
+  delete thread_list_;
 
-  // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
-  {
-    ScopedTrace trace2("Delete thread list");
-    delete thread_list_;
-  }
   // Delete the JIT after thread list to ensure that there is no remaining threads which could be
   // accessing the instrumentation when we delete it.
   if (jit_ != nullptr) {
@@ -905,6 +909,7 @@
     std::unique_ptr<VdexFile> vdex_file(VdexFile::Open(vdex_filename,
                                                        false /* writable */,
                                                        false /* low_4gb */,
+                                                       false, /* unquicken */
                                                        &error_msg));
     if (vdex_file.get() == nullptr) {
       return false;
@@ -1962,12 +1967,23 @@
   }
 }
 
+void Runtime::ClearInstructionSet() {
+  instruction_set_ = InstructionSet::kNone;
+}
+
 void Runtime::SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type) {
   DCHECK_LT(static_cast<int>(type), static_cast<int>(kLastCalleeSaveType));
   CHECK(method != nullptr);
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
+void Runtime::ClearCalleeSaveMethods() {
+  for (size_t i = 0; i < static_cast<size_t>(kLastCalleeSaveType); ++i) {
+    CalleeSaveType type = static_cast<CalleeSaveType>(i);
+    callee_save_methods_[type] = reinterpret_cast<uintptr_t>(nullptr);
+  }
+}
+
 void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
                               const std::string& profile_output_filename) {
   if (jit_.get() == nullptr) {
@@ -2133,7 +2149,7 @@
 void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::string>* argv)
     const {
   if (GetInstrumentation()->InterpretOnly()) {
-    argv->push_back("--compiler-filter=interpret-only");
+    argv->push_back("--compiler-filter=quicken");
   }
 
   // Make the dex2oat instruction set match that of the launching runtime. If we have multiple
diff --git a/runtime/runtime.h b/runtime/runtime.h
index df13b70..3ba0f2c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -356,6 +356,9 @@
   }
 
   void SetResolutionMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
+  void ClearResolutionMethod() {
+    resolution_method_ = nullptr;
+  }
 
   ArtMethod* CreateResolutionMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -367,6 +370,10 @@
     return imt_conflict_method_ != nullptr;
   }
 
+  void ClearImtConflictMethod() {
+    imt_conflict_method_ = nullptr;
+  }
+
   void FixupConflictTables();
   void SetImtConflictMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
   void SetImtUnimplementedMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -374,6 +381,10 @@
   ArtMethod* CreateImtConflictMethod(LinearAlloc* linear_alloc)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void ClearImtUnimplementedMethod() {
+    imt_unimplemented_method_ = nullptr;
+  }
+
   // Returns a special method that describes all callee saves being spilled to the stack.
   enum CalleeSaveType {
     kSaveAllCalleeSaves,  // All callee-save registers.
@@ -409,8 +420,10 @@
   }
 
   void SetInstructionSet(InstructionSet instruction_set);
+  void ClearInstructionSet();
 
   void SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type);
+  void ClearCalleeSaveMethods();
 
   ArtMethod* CreateCalleeSaveMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/runtime_callbacks_test.cc b/runtime/runtime_callbacks_test.cc
index abe99e0..640f9ce 100644
--- a/runtime/runtime_callbacks_test.cc
+++ b/runtime/runtime_callbacks_test.cc
@@ -335,6 +335,9 @@
 };
 
 TEST_F(RuntimeSigQuitCallbackRuntimeCallbacksTest, SigQuit) {
+  // SigQuit induces a dump. ASAN isn't happy with libunwind reading memory.
+  TEST_DISABLED_FOR_MEMORY_TOOL_ASAN();
+
   // The runtime needs to be started for the signal handler.
   Thread* self = Thread::Current();
 
diff --git a/runtime/string_reference.h b/runtime/string_reference.h
index 0fc06e6..6ba4773 100644
--- a/runtime/string_reference.h
+++ b/runtime/string_reference.h
@@ -41,7 +41,7 @@
 
 // Compare only the reference and not the string contents.
 struct StringReferenceComparator {
-  bool operator()(const StringReference& a, const StringReference& b) {
+  bool operator()(const StringReference& a, const StringReference& b) const {
     if (a.dex_file != b.dex_file) {
       return a.dex_file < b.dex_file;
     }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 201701a..653a9bd 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -35,6 +35,7 @@
 #include "android-base/stringprintf.h"
 
 #include "arch/context.h"
+#include "arch/context-inl.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/bit_utils.h"
@@ -128,12 +129,12 @@
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints);
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking);
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active);
 
 void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) {
   CHECK(kUseReadBarrier);
   tls32_.is_gc_marking = is_marking;
-  UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
+  UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active */ is_marking);
   ResetQuickAllocEntryPointsForThread(is_marking);
 }
 
@@ -3413,11 +3414,10 @@
     verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id));
   }
   // Visit roots on this thread's stack
-  Context* context = GetLongJumpContext();
+  RuntimeContextType context;
   RootCallbackVisitor visitor_to_callback(visitor, thread_id);
-  ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, context, visitor_to_callback);
+  ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, &context, visitor_to_callback);
   mapper.template WalkStack<StackVisitor::CountTransitions::kNo>(false);
-  ReleaseLongJumpContext(context);
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     visitor->VisitRootIfNonNull(&frame.this_object_, RootInfo(kRootVMInternal, thread_id));
   }
@@ -3604,4 +3604,9 @@
   return peer;
 }
 
+void Thread::SetReadBarrierEntrypoints() {
+  // Make sure entrypoints aren't null.
+  UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true);
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 5251012..6abde5b 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1180,6 +1180,9 @@
     return false;
   }
 
+  // Set to the read barrier marking entrypoints to be non-null.
+  void SetReadBarrierEntrypoints();
+
   static jobject CreateCompileTimePeer(JNIEnv* env,
                                        const char* name,
                                        bool as_daemon,
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 2e0d866..b63eaa4 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -73,12 +73,17 @@
       unregistering_count_(0),
       suspend_all_historam_("suspend all histogram", 16, 64),
       long_suspend_(false),
+      shut_down_(false),
       thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
       empty_checkpoint_barrier_(new Barrier(0)) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
 ThreadList::~ThreadList() {
+  CHECK(shut_down_);
+}
+
+void ThreadList::ShutDown() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
   // We need to detach the current thread here in case there's another thread waiting to join with
@@ -102,6 +107,8 @@
   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
   //       Thread::Init.
   SuspendAllDaemonThreadsForShutdown();
+
+  shut_down_ = true;
 }
 
 bool ThreadList::Contains(Thread* thread) {
@@ -1362,6 +1369,7 @@
 
 void ThreadList::Register(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
+  CHECK(!shut_down_);
 
   if (VLOG_IS_ON(threads)) {
     std::ostringstream oss;
@@ -1387,13 +1395,14 @@
   CHECK(!Contains(self));
   list_.push_back(self);
   if (kUseReadBarrier) {
+    gc::collector::ConcurrentCopying* const cc =
+        Runtime::Current()->GetHeap()->ConcurrentCopyingCollector();
     // Initialize according to the state of the CC collector.
-    bool is_gc_marking =
-        Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking();
-    self->SetIsGcMarkingAndUpdateEntrypoints(is_gc_marking);
-    bool weak_ref_access_enabled =
-        Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled();
-    self->SetWeakRefAccessEnabled(weak_ref_access_enabled);
+    self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking());
+    if (cc->IsUsingReadBarrierEntrypoints()) {
+      self->SetReadBarrierEntrypoints();
+    }
+    self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled());
   }
 }
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 70917eb..14bef5e 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -50,6 +50,8 @@
   explicit ThreadList(uint64_t thread_suspend_timeout_ns);
   ~ThreadList();
 
+  void ShutDown();
+
   void DumpForSigQuit(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_);
   // For thread suspend timeout dumps.
@@ -219,6 +221,10 @@
   // Whether or not the current thread suspension is long.
   bool long_suspend_;
 
+  // Whether the shutdown function has been called. This is checked in the destructor. It is an
+  // error to destroy a ThreadList instance without first calling ShutDown().
+  bool shut_down_;
+
   // Thread suspension timeout in nanoseconds.
   const uint64_t thread_suspend_timeout_ns_;
 
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 945f08b..e93f04d 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -21,8 +21,10 @@
 #include <memory>
 
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file.h"
+#include "dex_to_dex_decompiler.h"
 
 namespace art {
 
@@ -54,6 +56,7 @@
 std::unique_ptr<VdexFile> VdexFile::Open(const std::string& vdex_filename,
                                          bool writable,
                                          bool low_4gb,
+                                         bool unquicken,
                                          std::string* error_msg) {
   if (!OS::FileExists(vdex_filename.c_str())) {
     *error_msg = "File " + vdex_filename + " does not exist.";
@@ -78,7 +81,7 @@
     return nullptr;
   }
 
-  return Open(vdex_file->Fd(), vdex_length, vdex_filename, writable, low_4gb, error_msg);
+  return Open(vdex_file->Fd(), vdex_length, vdex_filename, writable, low_4gb, unquicken, error_msg);
 }
 
 std::unique_ptr<VdexFile> VdexFile::Open(int file_fd,
@@ -86,15 +89,17 @@
                                          const std::string& vdex_filename,
                                          bool writable,
                                          bool low_4gb,
+                                         bool unquicken,
                                          std::string* error_msg) {
-  std::unique_ptr<MemMap> mmap(MemMap::MapFile(vdex_length,
-                                               writable ? PROT_READ | PROT_WRITE : PROT_READ,
-                                               MAP_SHARED,
-                                               file_fd,
-                                               0 /* start offset */,
-                                               low_4gb,
-                                               vdex_filename.c_str(),
-                                               error_msg));
+  std::unique_ptr<MemMap> mmap(MemMap::MapFile(
+      vdex_length,
+      (writable || unquicken) ? PROT_READ | PROT_WRITE : PROT_READ,
+      unquicken ? MAP_PRIVATE : MAP_SHARED,
+      file_fd,
+      0 /* start offset */,
+      low_4gb,
+      vdex_filename.c_str(),
+      error_msg));
   if (mmap == nullptr) {
     *error_msg = "Failed to mmap file " + vdex_filename + " : " + *error_msg;
     return nullptr;
@@ -106,6 +111,16 @@
     return nullptr;
   }
 
+  if (unquicken) {
+    std::vector<std::unique_ptr<const DexFile>> unique_ptr_dex_files;
+    if (!vdex->OpenAllDexFiles(&unique_ptr_dex_files, error_msg)) {
+      return nullptr;
+    }
+    Unquicken(MakeNonOwningPointerVector(unique_ptr_dex_files), vdex->GetQuickeningInfo());
+    // Update the quickening info size to pretend there isn't any.
+    reinterpret_cast<Header*>(vdex->mmap_->Begin())->quickening_info_size_ = 0;
+  }
+
   *error_msg = "Success";
   return vdex;
 }
@@ -148,4 +163,62 @@
   return true;
 }
 
+void VdexFile::Unquicken(const std::vector<const DexFile*>& dex_files,
+                         const ArrayRef<const uint8_t>& quickening_info) {
+  if (quickening_info.size() == 0) {
+    // If there is no quickening info, we bail early, as the code below expects at
+    // least the size of quickening data for each method that has a code item.
+    return;
+  }
+  const uint8_t* quickening_info_ptr = quickening_info.data();
+  const uint8_t* const quickening_info_end = quickening_info.data() + quickening_info.size();
+  for (const DexFile* dex_file : dex_files) {
+    for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        continue;
+      }
+      ClassDataItemIterator it(*dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      while (it.HasNextDirectMethod()) {
+        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
+        if (code_item != nullptr) {
+          uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
+          quickening_info_ptr += sizeof(uint32_t);
+          optimizer::ArtDecompileDEX(*code_item,
+                                     ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
+                                     /* decompile_return_instruction */ false);
+          quickening_info_ptr += quickening_size;
+        }
+        it.Next();
+      }
+
+      while (it.HasNextVirtualMethod()) {
+        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
+        if (code_item != nullptr) {
+          uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
+          quickening_info_ptr += sizeof(uint32_t);
+          optimizer::ArtDecompileDEX(*code_item,
+                                     ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
+                                     /* decompile_return_instruction */ false);
+          quickening_info_ptr += quickening_size;
+        }
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+  if (quickening_info_ptr != quickening_info_end) {
+    LOG(FATAL) << "Failed to use all quickening info";
+  }
+}
+
 }  // namespace art
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 9840555..9c0d9db 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -71,6 +71,8 @@
     uint32_t dex_size_;
     uint32_t verifier_deps_size_;
     uint32_t quickening_info_size_;
+
+    friend class VdexFile;
   };
 
   typedef uint32_t VdexChecksum;
@@ -79,6 +81,7 @@
   static std::unique_ptr<VdexFile> Open(const std::string& vdex_filename,
                                         bool writable,
                                         bool low_4gb,
+                                        bool unquicken,
                                         std::string* error_msg);
 
   // Returns nullptr if the vdex file cannot be opened or is not valid.
@@ -87,6 +90,7 @@
                                         const std::string& vdex_filename,
                                         bool writable,
                                         bool low_4gb,
+                                        bool unquicken,
                                         std::string* error_msg);
 
   const uint8_t* Begin() const { return mmap_->Begin(); }
@@ -124,12 +128,14 @@
     return reinterpret_cast<const uint32_t*>(Begin() + sizeof(Header))[dex_file_index];
   }
 
-  // Opens all the dex files contained in this vdex file.  This is currently
-  // used for dumping tools only, and has not been tested for use by the
-  // remainder of the runtime.
+  // Opens all the dex files contained in this vdex file.
   bool OpenAllDexFiles(std::vector<std::unique_ptr<const DexFile>>* dex_files,
                        std::string* error_msg);
 
+  // In-place unquicken the given `dex_files` based on `quickening_info`.
+  static void Unquicken(const std::vector<const DexFile*>& dex_files,
+                        const ArrayRef<const uint8_t>& quickening_info);
+
  private:
   explicit VdexFile(MemMap* mmap) : mmap_(mmap) {}
 
diff --git a/runtime/vdex_file_test.cc b/runtime/vdex_file_test.cc
index 909e117..ced6e28 100644
--- a/runtime/vdex_file_test.cc
+++ b/runtime/vdex_file_test.cc
@@ -36,10 +36,12 @@
                                                   tmp.GetFilename(),
                                                   /*writable*/false,
                                                   /*low_4gb*/false,
+                                                  /*quicken*/false,
                                                   &error_msg);
   EXPECT_TRUE(vdex == nullptr);
 
-  vdex = VdexFile::Open(tmp.GetFilename(), /*writable*/false, /*low_4gb*/false, &error_msg);
+  vdex = VdexFile::Open(
+      tmp.GetFilename(), /*writable*/false, /*low_4gb*/false, /*quicken*/ false, &error_msg);
   EXPECT_TRUE(vdex == nullptr);
 }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2b0c612..cb9c605 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3764,7 +3764,7 @@
     const RegType& referrer = GetDeclaringClass();
     if (!referrer.IsUnresolvedTypes() && !referrer.CanAccess(*result)) {
       Fail(VERIFY_ERROR_ACCESS_CLASS) << "illegal class access: '"
-                                      << referrer << "' -> '" << result << "'";
+                                      << referrer << "' -> '" << *result << "'";
     }
   }
   return *result;
diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java
index 0e69a96..71167c1 100644
--- a/test/030-bad-finalizer/src/Main.java
+++ b/test/030-bad-finalizer/src/Main.java
@@ -94,9 +94,7 @@
             /* spin for a bit */
             long start, end;
             start = System.nanoTime();
-            for (int i = 0; i < 1000000; i++) {
-                j++;
-            }
+            snooze(2000);
             end = System.nanoTime();
             System.out.println("Finalizer done spinning.");
 
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 3236bde..2248fc4 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -56,7 +56,7 @@
 
     const OatFile* oat_file = oat_dex_file->GetOatFile();
     return !oat_file->IsPic()
-        && CompilerFilter::IsBytecodeCompilationEnabled(oat_file->GetCompilerFilter());
+        && CompilerFilter::IsAotCompilationEnabled(oat_file->GetCompilerFilter());
   }
 };
 
diff --git a/test/121-modifiers/info.txt b/test/121-modifiers/info.txt
index 129aee8..335df53 100644
--- a/test/121-modifiers/info.txt
+++ b/test/121-modifiers/info.txt
@@ -14,5 +14,5 @@
 mv NonInf.out classes/NonInf.class
 mv Main.class A.class A\$B.class A\$C.class classes/
 dx --debug --dex --output=classes.dex classes
-baksmali classes.dex
+baksmali disassemble classes.dex
 mv out/*.smali smali/
diff --git a/test/157-void-class/run b/test/157-void-class/run
index 59e852c..8c6159f 100755
--- a/test/157-void-class/run
+++ b/test/157-void-class/run
@@ -14,9 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Let the test build its own core image with --no-image and use verify-profile,
+# Let the test build its own core image with --no-image and use verify,
 # so that the compiler does not try to initialize classes. This leaves the
 # java.lang.Void compile-time verified but uninitialized.
 ./default-run "$@" --no-image \
     --runtime-option -Ximage-compiler-option \
-    --runtime-option --compiler-filter=verify-profile
+    --runtime-option --compiler-filter=verify
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 330aa74..a538f52 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -17,8 +17,8 @@
 // TODO: Add more tests after we can inline functions with calls.
 
 class ClassWithoutFinals {
-  /// CHECK-START: void ClassWithoutFinals.<init>() register (after)
-  /// CHECK-NOT: MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithoutFinals.<init>() inliner (after)
+  /// CHECK-NOT: ConstructorFence
   public ClassWithoutFinals() {}
 }
 
@@ -33,17 +33,40 @@
     // should not inline this constructor
   }
 
-  /// CHECK-START: void ClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
+
+  /*
+   * Check that the correct assembly instructions are selected for a Store/Store fence.
+   *
+   * - ARM variants:   DMB ISHST (store-store fence for inner shareable domain)
+   * - Intel variants: no-op (store-store does not need a fence).
+   */
+
+  /// CHECK-START-ARM64: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NEXT: dmb ishst
+
+  /// CHECK-START-ARM: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NEXT: dmb ishst
+
+  /// CHECK-START-X86_64: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  {{[slm]}}fence
+
+  /// CHECK-START-X86: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  {{[slm]}}fence
   public ClassWithFinals() {
     // Exactly one constructor barrier.
     x = 0;
   }
 
-  /// CHECK-START: void ClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
   public ClassWithFinals(int x) {
     // This should have exactly two barriers:
@@ -55,11 +78,11 @@
 }
 
 class InheritFromClassWithFinals extends ClassWithFinals {
-  /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public InheritFromClassWithFinals() {
     // Should inline the super constructor.
@@ -67,23 +90,23 @@
     // Exactly one constructor barrier here.
   }
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after)
   /// CHECK:      InvokeStaticOrDirect
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after)
+  /// CHECK-NOT:  ConstructorFence
   public InheritFromClassWithFinals(boolean cond) {
     super(cond);
     // should not inline the super constructor
   }
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  ConstructorFence
   /// CHECK:      ReturnVoid
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public InheritFromClassWithFinals(int unused) {
     // Should inline the super constructor and insert a memory barrier.
@@ -96,21 +119,21 @@
 class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals {
   final int y;
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after)
   /// CHECK-NOT: InvokeStaticOrDirect
   public HaveFinalsAndInheritFromClassWithFinals() {
     // Should inline the super constructor and keep the memory barrier.
     y = 0;
   }
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) inliner (after)
   /// CHECK:      InvokeStaticOrDirect
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
   public HaveFinalsAndInheritFromClassWithFinals(boolean cond) {
     super(cond);
@@ -118,15 +141,15 @@
     y = 0;
   }
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public HaveFinalsAndInheritFromClassWithFinals(int unused) {
     // Should inline the super constructor and keep keep both memory barriers.
@@ -141,55 +164,55 @@
 
 public class Main {
 
-  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
+  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after)
   /// CHECK:      InvokeStaticOrDirect
 
-  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after)
+  /// CHECK-NOT:  ConstructorFence
   public static ClassWithFinals noInlineNoConstructorBarrier() {
     return new ClassWithFinals(false);
     // should not inline the constructor
   }
 
-  /// CHECK-START: void Main.inlineNew() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew() register (after)
+  /// CHECK-START: void Main.inlineNew() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew() {
     new ClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew1() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew1() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew1() register (after)
+  /// CHECK-START: void Main.inlineNew1() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew1() {
     new InheritFromClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew2() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew2() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew2() register (after)
+  /// CHECK-START: void Main.inlineNew2() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew2() {
     new HaveFinalsAndInheritFromClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew3() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew3() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew3() register (after)
+  /// CHECK-START: void Main.inlineNew3() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew3() {
     new HaveFinalsAndInheritFromClassWithFinals();
diff --git a/test/530-checker-lse-ctor-fences/expected.txt b/test/530-checker-lse-ctor-fences/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/expected.txt
diff --git a/test/530-checker-lse-ctor-fences/info.txt b/test/530-checker-lse-ctor-fences/info.txt
new file mode 100644
index 0000000..ccc7b47
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/info.txt
@@ -0,0 +1 @@
+Checker test for testing load-store elimination with final fields (constructor fences).
diff --git a/test/530-checker-lse-ctor-fences/src/Main.java b/test/530-checker-lse-ctor-fences/src/Main.java
new file mode 100644
index 0000000..7755875
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/src/Main.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This base class has a single final field;
+// the constructor should have one fence.
+class Circle {
+  Circle(double radius) {
+    this.radius = radius;
+  }
+  public double getRadius() {
+    return radius;
+  }
+  public double getArea() {
+    return radius * radius * Math.PI;
+  }
+
+  public double getCircumference() {
+    return 2 * Math.PI * radius;
+  }
+
+  private final double radius;
+}
+
+// This subclass adds an extra final field;
+// there should be an extra constructor fence added
+// (for a total of 2 after inlining).
+class Ellipse extends Circle {
+  Ellipse(double vertex, double covertex) {
+    super(vertex);
+
+    this.covertex = covertex;
+  }
+
+  public double getVertex() {
+    return getRadius();
+  }
+
+  public double getCovertex() {
+    return covertex;
+  }
+
+  @Override
+  public double getArea() {
+    return getRadius() * covertex * Math.PI;
+  }
+
+  private final double covertex;
+}
+
+class CalcCircleAreaOrCircumference {
+  public static final int TYPE_AREA = 0;
+  public static final int TYPE_CIRCUMFERENCE = 1;
+
+  double value;
+
+  public CalcCircleAreaOrCircumference(int type) {
+    this.type = type;
+  }
+
+  final int type;
+}
+
+public class Main {
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: ConstructorFence
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Make sure the constructor fence gets eliminated when the allocation is eliminated.
+  static double calcCircleArea(double radius) {
+    return new Circle(radius).getArea();
+  }
+
+  /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: ConstructorFence
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Multiple constructor fences can accumulate through inheritance, make sure
+  // they are all eliminated when the allocation is eliminated.
+  static double calcEllipseArea(double vertex, double covertex) {
+    return new Ellipse(vertex, covertex).getArea();
+  }
+
+  /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: ConstructorFence
+
+  //
+  // The object allocation will not be eliminated by LSE because of aliased stores.
+  // However the object is still a singleton, so it never escapes the current thread.
+  // There should not be a constructor fence here after LSE.
+  static double calcCircleAreaOrCircumference(double radius, boolean area_or_circumference) {
+    CalcCircleAreaOrCircumference calc =
+      new CalcCircleAreaOrCircumference(
+          area_or_circumference ? CalcCircleAreaOrCircumference.TYPE_AREA :
+          CalcCircleAreaOrCircumference.TYPE_CIRCUMFERENCE);
+
+    if (area_or_circumference) {
+      // Area
+      calc.value = Math.PI * Math.PI * radius;
+    } else {
+      // Circumference
+      calc.value = 2 * Math.PI * radius;
+    }
+
+    return calc.value;
+  }
+
+  /// CHECK-START: Circle Main.makeCircle(double) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: ConstructorFence
+
+  // The object allocation is considered a singleton by LSE,
+  // but we cannot eliminate the new because it is returned.
+  //
+  // The constructor fence must also not be removed because the object could escape the
+  // current thread (in the caller).
+  static Circle makeCircle(double radius) {
+    return new Circle(radius);
+  }
+
+  static void assertIntEquals(int result, int expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertFloatEquals(float result, float expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertDoubleEquals(double result, double expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertInstanceOf(Object result, Class<?> expected) {
+    if (result.getClass() != expected) {
+      throw new Error("Expected type: " + expected + ", found : " + result.getClass());
+    }
+  }
+
+  public static void main(String[] args) {
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcCircleArea(Math.PI));
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcEllipseArea(Math.PI, Math.PI));
+    assertDoubleEquals(2 * Math.PI * Math.PI, calcCircleAreaOrCircumference(Math.PI, false));
+    assertInstanceOf(makeCircle(Math.PI), Circle.class);
+  }
+
+  static boolean sFlag;
+}
diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java
index 0fe3d87..491a9a1 100644
--- a/test/530-checker-lse2/src/Main.java
+++ b/test/530-checker-lse2/src/Main.java
@@ -76,16 +76,27 @@
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
   /// CHECK-DAG: NewInstance
   /// CHECK-DAG: NewInstance
@@ -95,9 +106,14 @@
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
   /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: ConstructorFence
 
   private float testMethod() {
     {
+      // Each of the "new" statements here will initialize an object with final fields,
+      // which after inlining will also retain a constructor fence.
+      //
+      // After LSE we remove the 'new-instance' and the associated constructor fence.
       int lI0 = (-1456058746 << mI);
       mD = ((double)(int)(double) mD);
       for (int i0 = 56 - 1; i0 >= 0; i0--) {
diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build
index a78021f..027a0ea 100644
--- a/test/551-checker-shifter-operand/build
+++ b/test/551-checker-shifter-operand/build
@@ -168,7 +168,7 @@
 
 if [ "${HAS_SMALI}" = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+  ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${SKIP_DX_MERGER} = "false" ]; then
diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
index 345e9fd..26d87b1 100644
--- a/test/569-checker-pattern-replacement/src/Main.java
+++ b/test/569-checker-pattern-replacement/src/Main.java
@@ -331,7 +331,7 @@
 
   /// CHECK-START: double Main.constructBase() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBase() {
@@ -347,7 +347,7 @@
 
   /// CHECK-START: double Main.constructBase(int) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int) inliner (after)
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
@@ -371,7 +371,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBaseWith0() {
@@ -387,7 +387,7 @@
 
   /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
   /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
@@ -411,7 +411,7 @@
 
   /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
   /// CHECK-NOT:                      InstanceFieldSet
@@ -431,7 +431,7 @@
 
   /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -460,7 +460,7 @@
 
   /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -493,7 +493,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -543,7 +543,7 @@
 
   /// CHECK-START: double Main.constructBase(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(double) inliner (after)
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
@@ -567,7 +567,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBaseWith0d() {
@@ -605,7 +605,7 @@
 
   /// CHECK-START: double Main.constructBase(int, long) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int, long) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -628,7 +628,7 @@
 
   /// CHECK-START: double Main.constructDerived() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerived() {
@@ -644,7 +644,7 @@
 
   /// CHECK-START: double Main.constructDerived(int) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int) inliner (after)
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
@@ -668,7 +668,7 @@
 
   /// CHECK-START: double Main.constructDerivedWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWith0() {
@@ -684,7 +684,7 @@
 
   /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InstanceFieldSet
@@ -702,7 +702,7 @@
 
   /// CHECK-START: double Main.constructDerived(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(double) inliner (after)
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
@@ -726,7 +726,7 @@
 
   /// CHECK-START: double Main.constructDerivedWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWith0d() {
@@ -744,7 +744,7 @@
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -794,7 +794,7 @@
 
   /// CHECK-START: double Main.constructDerived(float) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(float) inliner (after)
   /// CHECK-DAG:  <<Value:f\d+>>      ParameterValue
@@ -821,7 +821,7 @@
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -852,7 +852,7 @@
 
   /// CHECK-START: int Main.constructBaseWithFinalField() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructBaseWithFinalField() {
@@ -873,7 +873,7 @@
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -892,7 +892,7 @@
 
   /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructBaseWithFinalFieldWith0() {
@@ -907,7 +907,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalField() {
@@ -928,7 +928,7 @@
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -947,7 +947,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0() {
@@ -968,7 +968,7 @@
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -987,7 +987,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0d() {
@@ -1009,7 +1009,7 @@
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -1017,8 +1017,8 @@
   /// CHECK-NOT:                      InstanceFieldSet
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
-  /// CHECK-DAG:                      MemoryBarrier
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
+  /// CHECK-NOT:                      ConstructorFence
 
   public static double constructDerivedWithFinalField(int intValue, double doubleValue) {
     DerivedWithFinalField d = new DerivedWithFinalField(intValue, doubleValue);
@@ -1034,7 +1034,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0And0d() {
@@ -1049,7 +1049,7 @@
 
   /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex() {
@@ -1070,7 +1070,7 @@
   /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
 
   /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after)
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex(int intValue) {
@@ -1091,7 +1091,7 @@
   /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
 
   /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after)
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDexWith0() {
@@ -1107,7 +1107,7 @@
 
   /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex(long dummy) {
diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run
index 068ad03..fce6ac1 100644
--- a/test/595-profile-saving/run
+++ b/test/595-profile-saving/run
@@ -15,13 +15,13 @@
 # limitations under the License.
 
 # Use
-# --compiler-filter=interpret-only to make sure that the test is not compiled AOT
+# --compiler-filter=quicken to make sure that the test is not compiled AOT
 # and to make sure the test is not compiled  when loaded (by PathClassLoader)
 # -Xjitsaveprofilinginfo to enable profile saving
 # -Xusejit:false to disable jit and only test profiles.
 exec ${RUN} \
-  -Xcompiler-option --compiler-filter=interpret-only \
-  --runtime-option '-Xcompiler-option --compiler-filter=interpret-only' \
+  -Xcompiler-option --compiler-filter=quicken \
+  --runtime-option '-Xcompiler-option --compiler-filter=quicken' \
   --runtime-option -Xjitsaveprofilinginfo \
   --runtime-option -Xusejit:false \
   "${@}"
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 2b30986..520e7c3 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -280,7 +280,17 @@
     }
   }
 
-  // If vectorized, string encoding should be dealt with.
+  /// CHECK-START: void Main.string2Bytes(char[], java.lang.String) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.string2Bytes(char[], java.lang.String) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  // NOTE: should correctly deal with compressed and uncompressed cases.
   private static void string2Bytes(char[] a, String b) {
     int min = Math.min(a.length, b.length());
     for (int i = 0; i < min; i++) {
@@ -310,6 +320,37 @@
     }
   }
 
+  /// CHECK-START: void Main.oneBoth(short[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<One>>] loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<One>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.oneBoth(short[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Bug b/37764324: integral same-length packed types can be mixed freely.
+  private static void oneBoth(short[] a, char[] b) {
+    for (int i = 0; i < Math.min(a.length, b.length); i++) {
+      a[i] = 1;
+      b[i] = 1;
+    }
+  }
+
+  // Bug b/37768917: potential dynamic BCE vs. loop optimizations
+  // case should be deal with correctly (used to DCHECK fail).
+  private static void arrayInTripCount(int[] a, byte[] b, int n) {
+    for (int k = 0; k < n; k++) {
+      for (int i = 0, u = a[0]; i < u; i++) {
+        b[i] += 2;
+      }
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -390,9 +431,28 @@
     for (int i = 0; i < aa.length; i++) {
       expectEquals(aa[i], bb.charAt(i));
     }
+    String cc = "\u1010\u2020llo world how are y\u3030\u4040";
+    string2Bytes(aa, cc);
+    for (int i = 0; i < aa.length; i++) {
+      expectEquals(aa[i], cc.charAt(i));
+    }
 
     envUsesInCond();
 
+    short[] dd = new short[23];
+    oneBoth(dd, aa);
+    for (int i = 0; i < aa.length; i++) {
+      expectEquals(aa[i], 1);
+      expectEquals(dd[i], 1);
+    }
+
+    xx[0] = 10;
+    byte[] bt = new byte[10];
+    arrayInTripCount(xx, bt, 20);
+    for (int i = 0; i < bt.length; i++) {
+      expectEquals(40, bt[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/628-vdex/run b/test/628-vdex/run
index 4cbcea3..bf0ac91 100644
--- a/test/628-vdex/run
+++ b/test/628-vdex/run
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} -Xcompiler-option --compiler-filter=verify-profile --vdex "${@}"
+exec ${RUN} -Xcompiler-option --compiler-filter=verify --vdex "${@}"
diff --git a/test/634-vdex-duplicate/run b/test/634-vdex-duplicate/run
index 1ccb841..571ccd9 100644
--- a/test/634-vdex-duplicate/run
+++ b/test/634-vdex-duplicate/run
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} -Xcompiler-option --compiler-filter=verify-profile --vdex-filter speed --vdex "${@}"
+exec ${RUN} -Xcompiler-option --compiler-filter=verify --vdex-filter speed --vdex "${@}"
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 0f7452b..10b20b8 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -179,6 +179,11 @@
       a[i] >>>= 33;  // 1, since & 31
   }
 
+  static void shl9() {
+    for (int i = 0; i < 128; i++)
+      a[i] <<= 9;  // yields all-zeros
+  }
+
   //
   // Loop bounds.
   //
@@ -259,6 +264,10 @@
     shr33();
     for (int i = 0; i < 128; i++) {
       expectEquals((byte) 0x09, a[i], "shr33");
+    }
+    shl9();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0x00, a[i], "shl9");
       a[i] = (byte) 0xf0;  // reset
     }
     not();
diff --git a/test/648-inline-caches-unresolved/expected.txt b/test/648-inline-caches-unresolved/expected.txt
new file mode 100644
index 0000000..4e6a438
--- /dev/null
+++ b/test/648-inline-caches-unresolved/expected.txt
@@ -0,0 +1 @@
+Subclass
diff --git a/test/648-inline-caches-unresolved/info.txt b/test/648-inline-caches-unresolved/info.txt
new file mode 100644
index 0000000..8fc6042
--- /dev/null
+++ b/test/648-inline-caches-unresolved/info.txt
@@ -0,0 +1 @@
+Test for inlining with inline cache into an unresolved method.
diff --git a/test/648-inline-caches-unresolved/profile b/test/648-inline-caches-unresolved/profile
new file mode 100644
index 0000000..92c0a41
--- /dev/null
+++ b/test/648-inline-caches-unresolved/profile
@@ -0,0 +1 @@
+LMain;->inlineMonomorphicUnresolvedSuper(Ljava/lang/Object;)Ljava/lang/String;+LSubclass;
diff --git a/test/648-inline-caches-unresolved/run b/test/648-inline-caches-unresolved/run
new file mode 100644
index 0000000..fb70d22
--- /dev/null
+++ b/test/648-inline-caches-unresolved/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} $@ --secondary --profile
diff --git a/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java b/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java
new file mode 100644
index 0000000..dd3be006
--- /dev/null
+++ b/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UnresolvedSuperClass {
+  public void superMethod() {
+    System.out.println("UnresolvedClass.superMethod()");
+  }
+}
diff --git a/test/648-inline-caches-unresolved/src/Main.java b/test/648-inline-caches-unresolved/src/Main.java
new file mode 100644
index 0000000..4e8aeec
--- /dev/null
+++ b/test/648-inline-caches-unresolved/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main extends UnresolvedSuperClass {
+  public static String inlineMonomorphicUnresolvedSuper(Object o) {
+    return o.toString();
+  }
+
+  public static void main(String[] args) {
+    System.out.println(inlineMonomorphicUnresolvedSuper(new Subclass()));
+  }
+}
+
+class Subclass {
+  public String toString() {
+    return "Subclass";
+  }
+}
diff --git a/test/649-vdex-duplicate-method/classes.dex b/test/649-vdex-duplicate-method/classes.dex
new file mode 100644
index 0000000..8036a2f
--- /dev/null
+++ b/test/649-vdex-duplicate-method/classes.dex
Binary files differ
diff --git a/test/649-vdex-duplicate-method/expected.txt b/test/649-vdex-duplicate-method/expected.txt
new file mode 100644
index 0000000..573541a
--- /dev/null
+++ b/test/649-vdex-duplicate-method/expected.txt
@@ -0,0 +1 @@
+0
diff --git a/test/649-vdex-duplicate-method/info.txt b/test/649-vdex-duplicate-method/info.txt
new file mode 100644
index 0000000..d2c9959
--- /dev/null
+++ b/test/649-vdex-duplicate-method/info.txt
@@ -0,0 +1 @@
+Regression test for unquickening a vdex that has duplicate methods.
diff --git a/test/911-get-stack-trace/src/art/PrintThread.java b/test/911-get-stack-trace/src/art/PrintThread.java
index f50a66b..fee5ba0 100644
--- a/test/911-get-stack-trace/src/art/PrintThread.java
+++ b/test/911-get-stack-trace/src/art/PrintThread.java
@@ -41,7 +41,8 @@
   // We have to ignore some threads when printing all stack traces. These are threads that may or
   // may not exist depending on the environment.
   public final static String IGNORE_THREAD_NAME_REGEX =
-      "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main";
+      "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main|" +
+      "queued-work-looper";
   public final static Matcher IGNORE_THREADS =
       Pattern.compile(IGNORE_THREAD_NAME_REGEX).matcher("");
 
@@ -88,4 +89,4 @@
   }
 
   public static native String[][] getStackTrace(Thread thread, int start, int max);
-}
\ No newline at end of file
+}
diff --git a/test/912-classes/src/art/Test912Art.java b/test/912-classes/src/art/Test912Art.java
index 6da3cad..a1e7ff2 100644
--- a/test/912-classes/src/art/Test912Art.java
+++ b/test/912-classes/src/art/Test912Art.java
@@ -39,7 +39,7 @@
     //       run in configurations where dex2oat didn't verify the class itself. So explicitly
     //       check whether the class has been already loaded, and skip then.
     // TODO: Add multiple configurations to the run script once that becomes easier to do.
-    if (hasJit() && !isLoadedClass("art.Test912Art$ClassD")) {
+    if (hasJit() && !isLoadedClass("Lart/Test912Art$ClassD;")) {
       testClassEventsJit();
     }
   }
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index 702b247..b128d1c 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -385,3 +385,10 @@
 5@1002 --(field@10)--> 1@1000 [size=16, length=-1]
 5@1002 --(field@9)--> 6@1000 [size=16, length=-1]
 ---
+
+default
+image
+zygote
+app
+
+3
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index e319f7d..f39c5f1 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -817,5 +817,192 @@
   return result;
 }
 
+using GetObjectHeapId = jvmtiError(*)(jvmtiEnv*, jlong, jint*, ...);
+static GetObjectHeapId gGetObjectHeapIdFn = nullptr;
+
+using GetHeapName = jvmtiError(*)(jvmtiEnv*, jint, char**, ...);
+static GetHeapName gGetHeapNameFn = nullptr;
+
+static void FreeExtensionFunctionInfo(jvmtiExtensionFunctionInfo* extensions, jint count) {
+  for (size_t i = 0; i != static_cast<size_t>(count); ++i) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].id));
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].short_description));
+    for (size_t j = 0; j != static_cast<size_t>(extensions[i].param_count); ++j) {
+      jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params[j].name));
+    }
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params));
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].errors));
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkForExtensionApis(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  jint extension_count;
+  jvmtiExtensionFunctionInfo* extensions;
+  jvmtiError result = jvmti_env->GetExtensionFunctions(&extension_count, &extensions);
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
+    return;
+  }
+
+  for (size_t i = 0; i != static_cast<size_t>(extension_count); ++i) {
+    if (strcmp("com.android.art.heap.get_object_heap_id", extensions[i].id) == 0) {
+      CHECK(gGetObjectHeapIdFn == nullptr);
+      gGetObjectHeapIdFn = reinterpret_cast<GetObjectHeapId>(extensions[i].func);
+
+      CHECK_EQ(extensions[i].param_count, 2);
+
+      CHECK_EQ(strcmp("tag", extensions[i].params[0].name), 0);
+      CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JLONG);
+      CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN);
+
+      CHECK_EQ(strcmp("heap_id", extensions[i].params[1].name), 0);
+      CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JINT);
+      CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_OUT);
+      CHECK_EQ(extensions[i].params[1].null_ok, false);
+
+      CHECK_EQ(extensions[i].error_count, 1);
+      CHECK(extensions[i].errors != nullptr);
+      CHECK(extensions[i].errors[0] == JVMTI_ERROR_NOT_FOUND);
+
+      continue;
+    }
+
+    if (strcmp("com.android.art.heap.get_heap_name", extensions[i].id) == 0) {
+      CHECK(gGetHeapNameFn == nullptr);
+      gGetHeapNameFn = reinterpret_cast<GetHeapName>(extensions[i].func);
+
+      CHECK_EQ(extensions[i].param_count, 2);
+
+      CHECK_EQ(strcmp("heap_id", extensions[i].params[0].name), 0);
+      CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT);
+      CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN);
+
+      CHECK_EQ(strcmp("heap_name", extensions[i].params[1].name), 0);
+      CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_CCHAR);
+      CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_ALLOC_BUF);
+      CHECK_EQ(extensions[i].params[1].null_ok, false);
+
+      CHECK_EQ(extensions[i].error_count, 1);
+      CHECK(extensions[i].errors != nullptr);
+      CHECK(extensions[i].errors[0] == JVMTI_ERROR_ILLEGAL_ARGUMENT);
+    }
+  }
+
+  CHECK(gGetObjectHeapIdFn != nullptr);
+  CHECK(gGetHeapNameFn != nullptr);
+
+  FreeExtensionFunctionInfo(extensions, extension_count);
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getObjectHeapId(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) {
+  CHECK(gGetObjectHeapIdFn != nullptr);
+  jint heap_id;
+  jvmtiError result = gGetObjectHeapIdFn(jvmti_env, tag, &heap_id);
+  JvmtiErrorToException(env, jvmti_env, result);
+  return heap_id;
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_art_Test913_getHeapName(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jint heap_id) {
+  CHECK(gGetHeapNameFn != nullptr);
+  char* heap_name;
+  jvmtiError result = gGetHeapNameFn(jvmti_env, heap_id, &heap_name);
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
+    return nullptr;
+  }
+  jstring ret = env->NewStringUTF(heap_name);
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(heap_name));
+  return ret;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkGetObjectHeapIdInCallback(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag, jint heap_id) {
+  CHECK(gGetObjectHeapIdFn != nullptr);
+
+  {
+    struct GetObjectHeapIdCallbacks {
+      static jint JNICALL FollowReferencesCallback(
+          jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED,
+          const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED,
+          jlong class_tag ATTRIBUTE_UNUSED,
+          jlong referrer_class_tag ATTRIBUTE_UNUSED,
+          jlong size ATTRIBUTE_UNUSED,
+          jlong* tag_ptr,
+          jlong* referrer_tag_ptr ATTRIBUTE_UNUSED,
+          jint length ATTRIBUTE_UNUSED,
+          void* user_data) {
+        if (*tag_ptr != 0) {
+          GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data);
+          if (*tag_ptr == p->check_callback_tag) {
+            jint tag_heap_id;
+            jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id);
+            CHECK_EQ(result, JVMTI_ERROR_NONE);
+            CHECK_EQ(tag_heap_id, p->check_callback_id);
+            return JVMTI_VISIT_ABORT;
+          }
+        }
+
+        return JVMTI_VISIT_OBJECTS;  // Continue visiting.
+      }
+
+      jlong check_callback_tag;
+      jint check_callback_id;
+    };
+
+    jvmtiHeapCallbacks callbacks;
+    memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+    callbacks.heap_reference_callback = GetObjectHeapIdCallbacks::FollowReferencesCallback;
+
+    GetObjectHeapIdCallbacks ffc;
+    ffc.check_callback_tag = tag;
+    ffc.check_callback_id = heap_id;
+
+    jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, nullptr, &callbacks, &ffc);
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
+      return;
+    }
+  }
+
+  {
+    struct GetObjectHeapIdCallbacks {
+      static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                                jlong size ATTRIBUTE_UNUSED,
+                                                jlong* tag_ptr,
+                                                jint length ATTRIBUTE_UNUSED,
+                                                void* user_data) {
+        if (*tag_ptr != 0) {
+          GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data);
+          if (*tag_ptr == p->check_callback_tag) {
+            jint tag_heap_id;
+            jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id);
+            CHECK_EQ(result, JVMTI_ERROR_NONE);
+            CHECK_EQ(tag_heap_id, p->check_callback_id);
+            return JVMTI_VISIT_ABORT;
+          }
+        }
+
+        return 0;  // Continue visiting.
+      }
+
+      jlong check_callback_tag;
+      jint check_callback_id;
+    };
+
+    jvmtiHeapCallbacks callbacks;
+    memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+    callbacks.heap_iteration_callback = GetObjectHeapIdCallbacks::HeapIterationCallback;
+
+    GetObjectHeapIdCallbacks ffc;
+    ffc.check_callback_tag = tag;
+    ffc.check_callback_id = heap_id;
+
+    jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc);
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
+      return;
+    }
+  }
+}
+
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java
index 8800b1a..6694aad 100644
--- a/test/913-heaps/src/art/Test913.java
+++ b/test/913-heaps/src/art/Test913.java
@@ -16,6 +16,9 @@
 
 package art;
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -44,6 +47,8 @@
     };
     t.start();
     cdl1.await();
+
+    doExtensionTests();
   }
 
   public static void runFollowReferences() throws Exception {
@@ -215,6 +220,50 @@
     System.out.println(getTag(floatObject));
   }
 
+  static ArrayList<Object> extensionTestHolder;
+
+  private static void doExtensionTests() {
+    checkForExtensionApis();
+
+    extensionTestHolder = new ArrayList<>();
+    System.out.println();
+
+    try {
+      getHeapName(-1);
+      System.out.println("Expected failure for -1");
+    } catch (Exception e) {
+    }
+    System.out.println(getHeapName(0));
+    System.out.println(getHeapName(1));
+    System.out.println(getHeapName(2));
+    System.out.println(getHeapName(3));
+    try {
+      getHeapName(4);
+      System.out.println("Expected failure for -1");
+    } catch (Exception e) {
+    }
+
+    System.out.println();
+
+    setTag(Object.class, 100000);
+    int objectClassHeapId = getObjectHeapId(100000);
+    int objClassExpectedHeapId = hasImage() ? 1 : 3;
+    if (objectClassHeapId != objClassExpectedHeapId) {
+      throw new RuntimeException("Expected object class in heap " + objClassExpectedHeapId +
+          " but received " + objectClassHeapId);
+    }
+
+    A a = new A();
+    extensionTestHolder.add(a);
+    setTag(a, 100001);
+    System.out.println(getObjectHeapId(100001));
+
+    checkGetObjectHeapIdInCallback(100000, objClassExpectedHeapId);
+    checkGetObjectHeapIdInCallback(100001, 3);
+
+    extensionTestHolder = null;
+  }
+
   private static void runGc() {
     clearStats();
     forceGarbageCollection();
@@ -233,6 +282,24 @@
     System.out.println((s > 0) + " " + (f > 0));
   }
 
+  private static boolean hasImage() {
+    try {
+      int pid = Integer.parseInt(new File("/proc/self").getCanonicalFile().getName());
+      BufferedReader reader = new BufferedReader(new FileReader("/proc/" + pid + "/maps"));
+      String line;
+      while ((line = reader.readLine()) != null) {
+        if (line.endsWith(".art")) {
+          reader.close();
+          return true;
+        }
+      }
+      reader.close();
+      return false;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   private static class TestConfig {
     private Class<?> klass = null;
     private int heapFilter = 0;
@@ -642,6 +709,11 @@
   private static native int getGcFinishes();
   private static native void forceGarbageCollection();
 
+  private static native void checkForExtensionApis();
+  private static native int getObjectHeapId(long tag);
+  private static native String getHeapName(int heapId);
+  private static native void checkGetObjectHeapIdInCallback(long tag, int heapId);
+
   public static native String[] followReferences(int heapFilter, Class<?> klassFilter,
       Object initialObject, int stopAfter, int followSet, Object jniRef);
   public static native String[] followReferencesString(Object initialObject);
diff --git a/test/etc/default-build b/test/etc/default-build
index 744c38b..0508b85 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -82,9 +82,9 @@
 JACK_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
 
 declare -A SMALI_EXPERIMENTAL_ARGS
-SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
-SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api-level 26"
-SMALI_EXPERIMENTAL_ARGS["agents"]="--api-level 26"
+SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api 24"
+SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api 26"
+SMALI_EXPERIMENTAL_ARGS["agents"]="--api 26"
 
 declare -A JAVAC_EXPERIMENTAL_ARGS
 JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8"
@@ -275,7 +275,7 @@
 
 if [ "${HAS_SMALI}" = "true" -a ${NEED_DEX} = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+  ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${SKIP_DX_MERGER} = "false" ]; then
@@ -287,7 +287,7 @@
 
 if [ "${HAS_SMALI_MULTIDEX}" = "true" -a ${NEED_DEX} = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'`
+  ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index b34da08..f750556 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -469,15 +469,15 @@
 if [ "$INTERPRETER" = "y" ]; then
     INT_OPTS="-Xint"
     if [ "$VERIFY" = "y" ] ; then
-      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only"
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=quicken"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=quicken"
     elif [ "$VERIFY" = "s" ]; then
-      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=extract"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=extract"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:softfail"
     else # VERIFY = "n"
-      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=assume-verified"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=assume-verified"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
 fi
@@ -485,11 +485,11 @@
 if [ "$JIT" = "y" ]; then
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
-      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only"
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=quicken"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=quicken"
     else
-      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
-      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=assume-verified"
+      COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=assume-verified"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
 fi
@@ -564,6 +564,11 @@
   profman_cmdline="${ANDROID_ROOT}/bin/profman  \
     --apk=$DEX_LOCATION/$TEST_NAME.jar \
     --dex-location=$DEX_LOCATION/$TEST_NAME.jar"
+  if [ -f $DEX_LOCATION/$TEST_NAME-ex.jar ]; then
+    profman_cmdline="${profman_cmdline} \
+      --apk=$DEX_LOCATION/$TEST_NAME-ex.jar \
+      --dex-location=$DEX_LOCATION/$TEST_NAME-ex.jar"
+  fi
   COMPILE_FLAGS="${COMPILE_FLAGS} --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
   FLAGS="${FLAGS} -Xcompiler-option --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
   if [ "$PROFILE" = "y" ]; then
@@ -582,7 +587,11 @@
     app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
   fi
 
-  dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
+  dex2oat_binary=dex2oatd
+  if  [[ "$TEST_IS_NDEBUG" = "y" ]]; then
+    dex2oat_binary=dex2oat
+  fi
+  dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/$dex2oat_binary \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
@@ -603,7 +612,9 @@
     # Use -k 1m to SIGKILL it a minute later if it hasn't ended.
     dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}"
   fi
-  if [ "$TEST_VDEX" = "y" ]; then
+  if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
+    vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
+  elif [ "$TEST_VDEX" = "y" ]; then
     vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
   fi
 fi
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 3ca67bf..0a7089a 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -108,20 +108,19 @@
                         "non-deterministic. Same for 913."]
     },
     {
-        "tests": "961-default-iface-resolution-gen",
+        "tests": ["961-default-iface-resolution-gen",
+                  "964-default-iface-init-gen",
+                  "968-default-partial-compile-gen"],
         "variant": "gcstress",
-        "description": ["961-default-iface-resolution-gen and",
-                        "964-default-iface-init-genare very long tests that",
+        "description": ["961-default-iface-resolution-gen,",
+                        "968-default-partial-compile-gen and",
+                        "964-default-iface-init-gen are very long tests that",
                         "often will take more than the timeout to run when",
                         "gcstress is enabled. This is because gcstress slows",
                         "down allocations significantly which these tests do a",
                         "lot."]
     },
     {
-        "tests": "964-default-iface-init-gen",
-        "variant": "gcstress"
-    },
-    {
         "tests": "154-gc-loop",
         "variant": "gcstress | jit & debug",
         "description": ["154-gc-loop depends GC not happening too often"],
@@ -329,11 +328,6 @@
         "variant": "interpreter | optimizing | regalloc_gc | jit"
     },
     {
-        "tests": ["912-classes"],
-        "bug": "http://b/36344364",
-        "variant": "no-dex2oat | relocate-npatchoat"
-    },
-    {
         "tests": ["476-clinit-inline-static-invoke",
                   "496-checker-inlining-class-loader",
                   "508-referrer-method",
@@ -659,10 +653,29 @@
             "969-iface-super",
             "981-dedup-original-dex",
             "984-obsolete-invoke",
-            "985-re-obsolete"
+            "985-re-obsolete",
+            "987-stack-trace-dumping"
         ],
-        "description": "Tests that use javac/dx to build (not jack).",
+        "description": "The tests above fail with --build-with-javac-dx.",
         "env_vars": {"ANDROID_COMPILE_WITH_JACK": "false"},
         "bug": "b/37636792"
+    },
+    {
+        "tests": [
+            "536-checker-needs-access-check",
+            "537-checker-inline-and-unverified",
+            "569-checker-pattern-replacement",
+            "586-checker-null-array-get"
+        ],
+        "description": [
+            "Tests that have verify-at-runtime classes, but being compiled when using vdex."
+        ],
+        "variant": "speed-profile"
+    },
+    {
+        "tests": "648-many-direct-methods",
+        "variant": "debug",
+        "description": "Test disabled in debug mode because of dex2oatd timeouts.",
+        "bug": "b/33650497"
     }
 ]
diff --git a/test/run-test b/test/run-test
index f60f766..933a7fe 100755
--- a/test/run-test
+++ b/test/run-test
@@ -46,7 +46,7 @@
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
 export USE_JACK="true"
-export SMALI_ARGS="--experimental"
+export SMALI_ARGS=""
 
 # If dx was not set by the environment variable, assume it is in the path.
 if [ -z "$DX" ]; then
diff --git a/tools/art b/tools/art
index 933ad7a..0bc08f0 100644
--- a/tools/art
+++ b/tools/art
@@ -46,17 +46,17 @@
   fi
 }
 
-function replace_compiler_filter_with_interepret_only() {
-  ARGS_WITH_INTERPRET_ONLY=("$@")
+function replace_compiler_filter_with_quicken() {
+  ARGS_WITH_QUICKEN=("$@")
 
   found="false"
   ((index=0))
   while ((index <= $#)); do
-    what="${ARGS_WITH_INTERPRET_ONLY[$index]}"
+    what="${ARGS_WITH_QUICKEN[$index]}"
 
     case "$what" in
       --compiler-filter=*)
-        ARGS_WITH_INTERPRET_ONLY[$index]="--compiler-filter=interpret-only"
+        ARGS_WITH_QUICKEN[$index]="--compiler-filter=quicken"
         found="true"
         ;;
     esac
@@ -65,7 +65,7 @@
     shift
   done
   if [ "$found" != "true" ]; then
-    ARGS_WITH_INTERPRET_ONLY=(-Xcompiler-option --compiler-filter=interpret-only "${ARGS_WITH_INTERPRET_ONLY[@]}")
+    ARGS_WITH_QUICKEN=(-Xcompiler-option --compiler-filter=quicken "${ARGS_WITH_QUICKEN[@]}")
   fi
 }
 
@@ -224,10 +224,10 @@
   PROFILE_PATH="$ANDROID_DATA/primary.prof"
   touch $PROFILE_PATH
 
-  # Replace the compiler filter with interpret-only so that we
+  # Replace the compiler filter with quicken so that we
   # can capture the profile.
-  ARGS_WITH_INTERPRET_ONLY=
-  replace_compiler_filter_with_interepret_only "$@"
+  ARGS_WITH_QUICKEN=
+  replace_compiler_filter_with_quicken "$@"
 
   run_art -Xjitsaveprofilinginfo               \
           -Xps-min-methods-to-save:1           \
@@ -235,7 +235,7 @@
           -Xps-min-notification-before-wake:10 \
           -Xps-profile-path:$PROFILE_PATH      \
           -Xusejit:true                        \
-          "${ARGS_WITH_INTERPRET_ONLY[@]}"     \
+          "${ARGS_WITH_QUICKEN[@]}"            \
           "&>" "$ANDROID_DATA/profile_gen.log"
   EXIT_STATUS=$?
 
diff --git a/tools/cpp-define-generator/constant_card_table.def b/tools/cpp-define-generator/constant_card_table.def
new file mode 100644
index 0000000..ae3e8f3
--- /dev/null
+++ b/tools/cpp-define-generator/constant_card_table.def
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export heap values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/accounting/card_table.h"
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(CARD_TABLE_CARD_SHIFT, size_t, art::gc::accounting::CardTable::kCardShift)
+
diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
index 13371a1..b8947de 100644
--- a/tools/cpp-define-generator/offsets_all.def
+++ b/tools/cpp-define-generator/offsets_all.def
@@ -49,6 +49,7 @@
 // TODO: MIRROR_STRING offsets (depends on header size)
 #include "offset_dexcache.def"
 #include "constant_dexcache.def"
+#include "constant_card_table.def"
 #include "constant_heap.def"
 #include "constant_lockword.def"
 #include "constant_globals.def"
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Executor.java b/tools/dexfuzz/src/dexfuzz/executors/Executor.java
index 2bcf3a1..074672d 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Executor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Executor.java
@@ -117,7 +117,7 @@
     commandBuilder.append("--runtime-arg -classpath ");
     commandBuilder.append("--runtime-arg ").append(programName).append(" ");
     commandBuilder.append("--dex-file=").append(programName).append(" ");
-    commandBuilder.append("--compiler-filter=interpret-only --runtime-arg -Xnorelocate ");
+    commandBuilder.append("--compiler-filter=quicken --runtime-arg -Xnorelocate ");
 
     ExecutionResult verificationResult = device.executeCommand(commandBuilder.toString(), true,
         outputConsumer, errorConsumer);
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 07c300e..d48d857 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -132,8 +132,8 @@
   vm_args="--vm-arg $image"
 fi
 if $use_jit; then
-  vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
-  debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=interpret-only"
+  vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=quicken"
+  debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=quicken"
 fi
 vm_args="$vm_args --vm-arg -Xusejit:$use_jit"
 debuggee_args="$debuggee_args -Xusejit:$use_jit"
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 729a3e5..b860a62 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -127,7 +127,7 @@
 
 # JIT settings.
 if $use_jit; then
-  vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+  vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=quicken"
 fi
 vogar_args="$vogar_args --vm-arg -Xusejit:$use_jit"