Merge "Add back a deleted check related to verification."
diff --git a/CleanSpec.mk b/CleanSpec.mk
new file mode 100644
index 0000000..341df78
--- /dev/null
+++ b/CleanSpec.mk
@@ -0,0 +1,55 @@
+# Copyright (C) 2007 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# If you don't need to do a full clean build but would like to touch
+# a file or delete some intermediate files, add a clean step to the end
+# of the list.  These steps will only be run once, if they haven't been
+# run before.
+#
+# E.g.:
+#     $(call add-clean-step, touch -c external/sqlite/sqlite3.h)
+#     $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libz_intermediates)
+#
+# Always use "touch -c" and "rm -f" or "rm -rf" to gracefully deal with
+# files that are missing or have been moved.
+#
+# Use $(PRODUCT_OUT) to get to the "out/target/product/blah/" directory.
+# Use $(OUT_DIR) to refer to the "out" directory.
+#
+# If you need to re-do something that's already mentioned, just copy
+# the command and add it to the bottom of the list.  E.g., if a change
+# that you made last week required touching a file and a change you
+# made today requires touching the same file, just copy the old
+# touch step and add it to the end of the list.
+#
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
+
+# For example:
+#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/APPS/AndroidTests_intermediates)
+#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/JAVA_LIBRARIES/core_intermediates)
+#$(call add-clean-step, find $(OUT_DIR) -type f -name "IGTalkSession*" -print0 | xargs -0 rm -f)
+#$(call add-clean-step, rm -rf $(PRODUCT_OUT)/data/*)
+
+# Switching to jemalloc requires deleting these files.
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libart_*)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libartd_*)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libart_*)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libartd_*)
+
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 3a19c40..ee51fcd 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -179,6 +179,42 @@
   ART_TEST_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
 
+include $(CLEAR_VARS)
+LOCAL_MODULE := libart-gtest
+LOCAL_MODULE_TAGS := optional
+LOCAL_CPP_EXTENSION := cc
+LOCAL_CFLAGS := $(ART_TARGET_CFLAGS)
+LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_SHARED_LIBRARIES := libcutils libartd libartd-compiler libdl
+LOCAL_STATIC_LIBRARIES += libgtest_libc++
+LOCAL_CLANG := $(ART_TARGET_CLANG)
+LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
+include external/libcxx/libcxx.mk
+include $(BUILD_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := libart-gtest
+LOCAL_MODULE_TAGS := optional
+LOCAL_CPP_EXTENSION := cc
+LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_SHARED_LIBRARIES := libartd libartd-compiler
+LOCAL_STATIC_LIBRARIES := libcutils
+ifneq ($(WITHOUT_HOST_CLANG),true)
+  # GCC host compiled tests fail with this linked, presumably due to destructors that run.
+  LOCAL_STATIC_LIBRARIES += libgtest_libc++_host
+endif
+LOCAL_LDLIBS += -ldl -lpthread
+LOCAL_MULTILIB := both
+LOCAL_CLANG := $(ART_HOST_CLANG)
+LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
+include external/libcxx/libcxx.mk
+include $(BUILD_HOST_SHARED_LIBRARY)
+
 # Variables holding collections of gtest pre-requisits used to run a number of gtests.
 ART_TEST_HOST_GTEST$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_GTEST$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
@@ -237,7 +273,7 @@
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): $$(gtest_exe) $$(ART_GTEST_$(1)_HOST_DEPS) $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX)) $$(gtest_deps)
-	$(hide) ($$(call ART_TEST_SKIP,$$@) && $$< && $$(call ART_TEST_PASSED,$$@)) \
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) $$< && $$(call ART_TEST_PASSED,$$@)) \
 	  || $$(call ART_TEST_FAILED,$$@)
 
   ART_TEST_HOST_GTEST$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(gtest_rule)
@@ -285,12 +321,12 @@
     LOCAL_MODULE_TAGS := tests
   endif
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
-  LOCAL_SRC_FILES := $$(art_gtest_filename) runtime/common_runtime_test.cc
+  LOCAL_SRC_FILES := $$(art_gtest_filename)
   LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
-  LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries)
+  LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest
 
-  # LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
-  # LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
 
   # Mac OS linker doesn't understand --export-dynamic.
   ifneq ($$(HOST_OS)-$$(art_target_or_host),darwin-host)
@@ -304,7 +340,6 @@
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,debug))
     LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
-    LOCAL_STATIC_LIBRARIES += libgtest_libc++
     LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
@@ -328,10 +363,6 @@
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
     LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libz-host
     LOCAL_STATIC_LIBRARIES += libcutils libvixl
-    ifneq ($$(WITHOUT_HOST_CLANG),true)
-      # GCC host compiled tests fail with this linked, presumably due to destructors that run.
-      LOCAL_STATIC_LIBRARIES += libgtest_libc++_host
-    endif
     LOCAL_LDLIBS += -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 61a2cde..10936a4 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -48,11 +48,6 @@
 $(eval $(call create-core-oat-host-rules,2ND_))
 endif
 
-IMPLICIT_CHECKS_arm := null,stack
-IMPLICIT_CHECKS_arm64 := none
-IMPLICIT_CHECKS_x86 := none
-IMPLICIT_CHECKS_x86_64 := none
-IMPLICIT_CHECKS_mips := none
 define create-core-oat-target-rules
 $$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "target dex2oat: $$@ ($$?)"
@@ -63,7 +58,6 @@
 	  --oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) \
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(1)TARGET_ARCH) \
 	  --instruction-set-features=$$($(1)TARGET_INSTRUCTION_SET_FEATURES) \
-	  --implicit-checks=$(IMPLICIT_CHECKS_$($(1)TARGET_ARCH)) \
 	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information
 
 # This "renaming" eases declaration in art/Android.mk
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b469946..02dad2a 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -72,6 +72,7 @@
 	dex/verification_results.cc \
 	dex/vreg_analysis.cc \
 	dex/ssa_transformation.cc \
+	dex/quick_compiler_callbacks.cc \
 	driver/compiler_driver.cc \
 	driver/dex_compilation_unit.cc \
 	jni/quick/arm/calling_convention_arm.cc \
@@ -95,6 +96,7 @@
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
 	optimizing/ssa_type_propagation.cc \
+	optimizing/ssa_phi_elimination.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
new file mode 100644
index 0000000..051cfb6
--- /dev/null
+++ b/compiler/common_compiler_test.cc
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_compiler_test.h"
+
+#if defined(__arm__)
+#include <sys/ucontext.h>
+#endif
+#include <fstream>
+
+#include "class_linker.h"
+#include "compiled_method.h"
+#include "dex/quick_compiler_callbacks.h"
+#include "dex/verification_results.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "driver/compiler_driver.h"
+#include "entrypoints/entrypoint_utils.h"
+#include "interpreter/interpreter.h"
+#include "mirror/art_method.h"
+#include "mirror/dex_cache.h"
+#include "mirror/object-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+
+// Normally the ClassLinker supplies this.
+extern "C" void art_quick_generic_jni_trampoline(mirror::ArtMethod*);
+
+#if defined(__arm__)
+// A signal handler called when have an illegal instruction.  We record the fact in
+// a global boolean and then increment the PC in the signal context to return to
+// the next instruction.  We know the instruction is an sdiv (4 bytes long).
+static void baddivideinst(int signo, siginfo *si, void *data) {
+  UNUSED(signo);
+  UNUSED(si);
+  struct ucontext *uc = (struct ucontext *)data;
+  struct sigcontext *sc = &uc->uc_mcontext;
+  sc->arm_r0 = 0;     // set R0 to #0 to signal error
+  sc->arm_pc += 4;    // skip offending instruction
+}
+
+// This is in arch/arm/arm_sdiv.S.  It does the following:
+// mov r1,#1
+// sdiv r0,r1,r1
+// bx lr
+//
+// the result will be the value 1 if sdiv is supported.  If it is not supported
+// a SIGILL signal will be raised and the signal handler (baddivideinst) called.
+// The signal handler sets r0 to #0 and then increments pc beyond the failed instruction.
+// Thus if the instruction is not supported, the result of this function will be #0
+
+extern "C" bool CheckForARMSDIVInstruction();
+
+static InstructionSetFeatures GuessInstructionFeatures() {
+  InstructionSetFeatures f;
+
+  // Uncomment this for processing of /proc/cpuinfo.
+  if (false) {
+    // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+    // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+    std::ifstream in("/proc/cpuinfo");
+    if (in) {
+      while (!in.eof()) {
+        std::string line;
+        std::getline(in, line);
+        if (!in.eof()) {
+          if (line.find("Features") != std::string::npos) {
+            if (line.find("idivt") != std::string::npos) {
+              f.SetHasDivideInstruction(true);
+            }
+          }
+        }
+        in.close();
+      }
+    } else {
+      LOG(INFO) << "Failed to open /proc/cpuinfo";
+    }
+  }
+
+  // See if have a sdiv instruction.  Register a signal handler and try to execute
+  // an sdiv instruction.  If we get a SIGILL then it's not supported.  We can't use
+  // the /proc/cpuinfo method for this because Krait devices don't always put the idivt
+  // feature in the list.
+  struct sigaction sa, osa;
+  sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
+  sa.sa_sigaction = baddivideinst;
+  sigaction(SIGILL, &sa, &osa);
+
+  if (CheckForARMSDIVInstruction()) {
+    f.SetHasDivideInstruction(true);
+  }
+
+  // Restore the signal handler.
+  sigaction(SIGILL, &osa, nullptr);
+
+  // Other feature guesses in here.
+  return f;
+}
+#endif
+
+// Given a set of instruction features from the build, parse it.  The
+// input 'str' is a comma separated list of feature names.  Parse it and
+// return the InstructionSetFeatures object.
+static InstructionSetFeatures ParseFeatureList(std::string str) {
+  InstructionSetFeatures result;
+  typedef std::vector<std::string> FeatureList;
+  FeatureList features;
+  Split(str, ',', features);
+  for (FeatureList::iterator i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "default") {
+      // Nothing to do.
+    } else if (feature == "div") {
+      // Supports divide instruction.
+      result.SetHasDivideInstruction(true);
+    } else if (feature == "nodiv") {
+      // Turn off support for divide instruction.
+      result.SetHasDivideInstruction(false);
+    } else {
+      LOG(FATAL) << "Unknown instruction set feature: '" << feature << "'";
+    }
+  }
+  // Others...
+  return result;
+}
+
+CommonCompilerTest::CommonCompilerTest() {}
+CommonCompilerTest::~CommonCompilerTest() {}
+
+OatFile::OatMethod CommonCompilerTest::CreateOatMethod(const void* code, const uint8_t* gc_map) {
+  CHECK(code != nullptr);
+  const byte* base;
+  uint32_t code_offset, gc_map_offset;
+  if (gc_map == nullptr) {
+    base = reinterpret_cast<const byte*>(code);  // Base of data points at code.
+    base -= kPointerSize;  // Move backward so that code_offset != 0.
+    code_offset = kPointerSize;
+    gc_map_offset = 0;
+  } else {
+    // TODO: 64bit support.
+    base = nullptr;  // Base of data in oat file, ie 0.
+    code_offset = PointerToLowMemUInt32(code);
+    gc_map_offset = PointerToLowMemUInt32(gc_map);
+  }
+  return OatFile::OatMethod(base, code_offset, gc_map_offset);
+}
+
+void CommonCompilerTest::MakeExecutable(mirror::ArtMethod* method) {
+  CHECK(method != nullptr);
+
+  const CompiledMethod* compiled_method = nullptr;
+  if (!method->IsAbstract()) {
+    mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
+    const DexFile& dex_file = *dex_cache->GetDexFile();
+    compiled_method =
+        compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
+                                                            method->GetDexMethodIndex()));
+  }
+  if (compiled_method != nullptr) {
+    const std::vector<uint8_t>* code = compiled_method->GetQuickCode();
+    const void* code_ptr;
+    if (code != nullptr) {
+      uint32_t code_size = code->size();
+      CHECK_NE(0u, code_size);
+      const std::vector<uint8_t>& vmap_table = compiled_method->GetVmapTable();
+      uint32_t vmap_table_offset = vmap_table.empty() ? 0u
+          : sizeof(OatQuickMethodHeader) + vmap_table.size();
+      const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
+      uint32_t mapping_table_offset = mapping_table.empty() ? 0u
+          : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size();
+      OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset,
+                                         compiled_method->GetFrameSizeInBytes(),
+                                         compiled_method->GetCoreSpillMask(),
+                                         compiled_method->GetFpSpillMask(), code_size);
+
+      header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
+      std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
+      size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table.size();
+      size_t code_offset = compiled_method->AlignCode(size - code_size);
+      size_t padding = code_offset - (size - code_size);
+      chunk->reserve(padding + size);
+      chunk->resize(sizeof(method_header));
+      memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
+      chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
+      chunk->insert(chunk->begin(), mapping_table.begin(), mapping_table.end());
+      chunk->insert(chunk->begin(), padding, 0);
+      chunk->insert(chunk->end(), code->begin(), code->end());
+      CHECK_EQ(padding + size, chunk->size());
+      code_ptr = &(*chunk)[code_offset];
+    } else {
+      code = compiled_method->GetPortableCode();
+      code_ptr = &(*code)[0];
+    }
+    MakeExecutable(code_ptr, code->size());
+    const void* method_code = CompiledMethod::CodePointer(code_ptr,
+                                                          compiled_method->GetInstructionSet());
+    LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
+    OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
+    oat_method.LinkMethod(method);
+    method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
+  } else {
+    // No code? You must mean to go into the interpreter.
+    // Or the generic JNI...
+    if (!method->IsNative()) {
+      const void* method_code = kUsePortableCompiler ? GetPortableToInterpreterBridge()
+          : GetQuickToInterpreterBridge();
+      OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
+      oat_method.LinkMethod(method);
+      method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
+    } else {
+      const void* method_code = reinterpret_cast<void*>(art_quick_generic_jni_trampoline);
+
+      OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
+      oat_method.LinkMethod(method);
+      method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
+    }
+  }
+  // Create bridges to transition between different kinds of compiled bridge.
+  if (method->GetEntryPointFromPortableCompiledCode() == nullptr) {
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
+  } else {
+    CHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToPortableBridge());
+    method->SetIsPortableCompiled();
+  }
+}
+
+void CommonCompilerTest::MakeExecutable(const void* code_start, size_t code_length) {
+  CHECK(code_start != nullptr);
+  CHECK_NE(code_length, 0U);
+  uintptr_t data = reinterpret_cast<uintptr_t>(code_start);
+  uintptr_t base = RoundDown(data, kPageSize);
+  uintptr_t limit = RoundUp(data + code_length, kPageSize);
+  uintptr_t len = limit - base;
+  int result = mprotect(reinterpret_cast<void*>(base), len, PROT_READ | PROT_WRITE | PROT_EXEC);
+  CHECK_EQ(result, 0);
+
+  // Flush instruction cache
+  // Only uses __builtin___clear_cache if GCC >= 4.3.3
+#if GCC_VERSION >= 40303
+  __builtin___clear_cache(reinterpret_cast<void*>(base), reinterpret_cast<void*>(base + len));
+#else
+  // Only warn if not Intel as Intel doesn't have cache flush instructions.
+#if !defined(__i386__) && !defined(__x86_64__)
+  LOG(WARNING) << "UNIMPLEMENTED: cache flush";
+#endif
+#endif
+}
+
+void CommonCompilerTest::MakeExecutable(mirror::ClassLoader* class_loader, const char* class_name) {
+  std::string class_descriptor(DotToDescriptor(class_name));
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
+  mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
+  CHECK(klass != nullptr) << "Class not found " << class_name;
+  for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
+    MakeExecutable(klass->GetDirectMethod(i));
+  }
+  for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
+    MakeExecutable(klass->GetVirtualMethod(i));
+  }
+}
+
+void CommonCompilerTest::SetUp() {
+  CommonRuntimeTest::SetUp();
+  {
+    ScopedObjectAccess soa(Thread::Current());
+
+    InstructionSet instruction_set = kRuntimeISA;
+
+    // Take the default set of instruction features from the build.
+    InstructionSetFeatures instruction_set_features =
+        ParseFeatureList(Runtime::GetDefaultInstructionSetFeatures());
+
+#if defined(__arm__)
+    InstructionSetFeatures runtime_features = GuessInstructionFeatures();
+
+    // for ARM, do a runtime check to make sure that the features we are passed from
+    // the build match the features we actually determine at runtime.
+    ASSERT_LE(instruction_set_features, runtime_features);
+#endif
+
+    runtime_->SetInstructionSet(instruction_set);
+    for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
+      Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
+      if (!runtime_->HasCalleeSaveMethod(type)) {
+        runtime_->SetCalleeSaveMethod(
+            runtime_->CreateCalleeSaveMethod(type), type);
+      }
+    }
+
+    // TODO: make selectable
+    Compiler::Kind compiler_kind
+    = (kUsePortableCompiler) ? Compiler::kPortable : Compiler::kQuick;
+    timer_.reset(new CumulativeLogger("Compilation times"));
+    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                              verification_results_.get(),
+                                              method_inliner_map_.get(),
+                                              compiler_kind, instruction_set,
+                                              instruction_set_features,
+                                              true, new CompilerDriver::DescriptorSet,
+                                              2, true, true, timer_.get()));
+  }
+  // We typically don't generate an image in unit tests, disable this optimization by default.
+  compiler_driver_->SetSupportBootImageFixup(false);
+}
+
+void CommonCompilerTest::SetUpRuntimeOptions(RuntimeOptions* options) {
+  CommonRuntimeTest::SetUpRuntimeOptions(options);
+
+  compiler_options_.reset(new CompilerOptions);
+  verification_results_.reset(new VerificationResults(compiler_options_.get()));
+  method_inliner_map_.reset(new DexFileToMethodInlinerMap);
+  callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
+                                              method_inliner_map_.get()));
+  options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+}
+
+void CommonCompilerTest::TearDown() {
+  timer_.reset();
+  compiler_driver_.reset();
+  callbacks_.reset();
+  method_inliner_map_.reset();
+  verification_results_.reset();
+  compiler_options_.reset();
+
+  CommonRuntimeTest::TearDown();
+}
+
+void CommonCompilerTest::CompileClass(mirror::ClassLoader* class_loader, const char* class_name) {
+  std::string class_descriptor(DotToDescriptor(class_name));
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
+  mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
+  CHECK(klass != nullptr) << "Class not found " << class_name;
+  for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
+    CompileMethod(klass->GetDirectMethod(i));
+  }
+  for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
+    CompileMethod(klass->GetVirtualMethod(i));
+  }
+}
+
+void CommonCompilerTest::CompileMethod(mirror::ArtMethod* method) {
+  CHECK(method != nullptr);
+  TimingLogger timings("CommonTest::CompileMethod", false, false);
+  TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
+  compiler_driver_->CompileOne(method, &timings);
+  TimingLogger::ScopedTiming t2("MakeExecutable", &timings);
+  MakeExecutable(method);
+}
+
+void CommonCompilerTest::CompileDirectMethod(Handle<mirror::ClassLoader> class_loader,
+                                             const char* class_name, const char* method_name,
+                                             const char* signature) {
+  std::string class_descriptor(DotToDescriptor(class_name));
+  Thread* self = Thread::Current();
+  mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), class_loader);
+  CHECK(klass != nullptr) << "Class not found " << class_name;
+  mirror::ArtMethod* method = klass->FindDirectMethod(method_name, signature);
+  CHECK(method != nullptr) << "Direct method not found: "
+      << class_name << "." << method_name << signature;
+  CompileMethod(method);
+}
+
+void CommonCompilerTest::CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
+                                              const char* method_name, const char* signature)
+SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  std::string class_descriptor(DotToDescriptor(class_name));
+  Thread* self = Thread::Current();
+  mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), class_loader);
+  CHECK(klass != nullptr) << "Class not found " << class_name;
+  mirror::ArtMethod* method = klass->FindVirtualMethod(method_name, signature);
+  CHECK(method != NULL) << "Virtual method not found: "
+      << class_name << "." << method_name << signature;
+  CompileMethod(method);
+}
+
+void CommonCompilerTest::ReserveImageSpace() {
+  // Reserve where the image will be loaded up front so that other parts of test set up don't
+  // accidentally end up colliding with the fixed memory address when we need to load the image.
+  std::string error_msg;
+  image_reservation_.reset(MemMap::MapAnonymous("image reservation",
+                                                reinterpret_cast<byte*>(ART_BASE_ADDRESS),
+                                                (size_t)100 * 1024 * 1024,  // 100MB
+                                                PROT_NONE,
+                                                false /* no need for 4gb flag with fixed mmap*/,
+                                                &error_msg));
+  CHECK(image_reservation_.get() != nullptr) << error_msg;
+}
+
+void CommonCompilerTest::UnreserveImageSpace() {
+  image_reservation_.reset();
+}
+
+}  // namespace art
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index e11f61a..df06b71 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -17,409 +17,68 @@
 #ifndef ART_COMPILER_COMMON_COMPILER_TEST_H_
 #define ART_COMPILER_COMMON_COMPILER_TEST_H_
 
-#include "compiler.h"
-#include "compiler_callbacks.h"
+#include <list>
+#include <vector>
+
 #include "common_runtime_test.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/verification_results.h"
-#include "driver/compiler_callbacks_impl.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
+#include "oat_file.h"
 
 namespace art {
+namespace mirror {
+  class ClassLoader;
+}  // namespace mirror
 
-#if defined(__arm__)
+class CompilerDriver;
+class CompilerOptions;
+class CumulativeLogger;
+class DexFileToMethodInlinerMap;
+class VerificationResults;
 
-#include <sys/ucontext.h>
-
-// A signal handler called when have an illegal instruction.  We record the fact in
-// a global boolean and then increment the PC in the signal context to return to
-// the next instruction.  We know the instruction is an sdiv (4 bytes long).
-static inline void baddivideinst(int signo, siginfo *si, void *data) {
-  UNUSED(signo);
-  UNUSED(si);
-  struct ucontext *uc = (struct ucontext *)data;
-  struct sigcontext *sc = &uc->uc_mcontext;
-  sc->arm_r0 = 0;     // set R0 to #0 to signal error
-  sc->arm_pc += 4;    // skip offending instruction
-}
-
-// This is in arch/arm/arm_sdiv.S.  It does the following:
-// mov r1,#1
-// sdiv r0,r1,r1
-// bx lr
-//
-// the result will be the value 1 if sdiv is supported.  If it is not supported
-// a SIGILL signal will be raised and the signal handler (baddivideinst) called.
-// The signal handler sets r0 to #0 and then increments pc beyond the failed instruction.
-// Thus if the instruction is not supported, the result of this function will be #0
-
-extern "C" bool CheckForARMSDIVInstruction();
-
-static inline InstructionSetFeatures GuessInstructionFeatures() {
-  InstructionSetFeatures f;
-
-  // Uncomment this for processing of /proc/cpuinfo.
-  if (false) {
-    // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
-    // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
-    std::ifstream in("/proc/cpuinfo");
-    if (in) {
-      while (!in.eof()) {
-        std::string line;
-        std::getline(in, line);
-        if (!in.eof()) {
-          if (line.find("Features") != std::string::npos) {
-            if (line.find("idivt") != std::string::npos) {
-              f.SetHasDivideInstruction(true);
-            }
-          }
-        }
-        in.close();
-      }
-    } else {
-      LOG(INFO) << "Failed to open /proc/cpuinfo";
-    }
-  }
-
-  // See if have a sdiv instruction.  Register a signal handler and try to execute
-  // an sdiv instruction.  If we get a SIGILL then it's not supported.  We can't use
-  // the /proc/cpuinfo method for this because Krait devices don't always put the idivt
-  // feature in the list.
-  struct sigaction sa, osa;
-  sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
-  sa.sa_sigaction = baddivideinst;
-  sigaction(SIGILL, &sa, &osa);
-
-  if (CheckForARMSDIVInstruction()) {
-    f.SetHasDivideInstruction(true);
-  }
-
-  // Restore the signal handler.
-  sigaction(SIGILL, &osa, nullptr);
-
-  // Other feature guesses in here.
-  return f;
-}
-
-#endif
-
-// Given a set of instruction features from the build, parse it.  The
-// input 'str' is a comma separated list of feature names.  Parse it and
-// return the InstructionSetFeatures object.
-static inline InstructionSetFeatures ParseFeatureList(std::string str) {
-  InstructionSetFeatures result;
-  typedef std::vector<std::string> FeatureList;
-  FeatureList features;
-  Split(str, ',', features);
-  for (FeatureList::iterator i = features.begin(); i != features.end(); i++) {
-    std::string feature = Trim(*i);
-    if (feature == "default") {
-      // Nothing to do.
-    } else if (feature == "div") {
-      // Supports divide instruction.
-      result.SetHasDivideInstruction(true);
-    } else if (feature == "nodiv") {
-      // Turn off support for divide instruction.
-      result.SetHasDivideInstruction(false);
-    } else {
-      LOG(FATAL) << "Unknown instruction set feature: '" << feature << "'";
-    }
-  }
-  // Others...
-  return result;
-}
-
-// Normally the ClassLinker supplies this.
-extern "C" void art_quick_generic_jni_trampoline(mirror::ArtMethod*);
+template<class T> class Handle;
 
 class CommonCompilerTest : public CommonRuntimeTest {
  public:
+  CommonCompilerTest();
+  ~CommonCompilerTest();
+
   // Create an OatMethod based on pointers (for unit tests).
-  OatFile::OatMethod CreateOatMethod(const void* code,
-                                     const uint8_t* gc_map) {
-    CHECK(code != nullptr);
-    const byte* base;
-    uint32_t code_offset, gc_map_offset;
-    if (gc_map == nullptr) {
-      base = reinterpret_cast<const byte*>(code);  // Base of data points at code.
-      base -= kPointerSize;  // Move backward so that code_offset != 0.
-      code_offset = kPointerSize;
-      gc_map_offset = 0;
-    } else {
-      // TODO: 64bit support.
-      base = nullptr;  // Base of data in oat file, ie 0.
-      code_offset = PointerToLowMemUInt32(code);
-      gc_map_offset = PointerToLowMemUInt32(gc_map);
-    }
-    return OatFile::OatMethod(base,
-                              code_offset,
-                              gc_map_offset);
-  }
+  OatFile::OatMethod CreateOatMethod(const void* code, const uint8_t* gc_map);
 
-  void MakeExecutable(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(method != nullptr);
+  void MakeExecutable(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-    const CompiledMethod* compiled_method = nullptr;
-    if (!method->IsAbstract()) {
-      mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
-      const DexFile& dex_file = *dex_cache->GetDexFile();
-      compiled_method =
-          compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
-                                                              method->GetDexMethodIndex()));
-    }
-    if (compiled_method != nullptr) {
-      const std::vector<uint8_t>* code = compiled_method->GetQuickCode();
-      const void* code_ptr;
-      if (code != nullptr) {
-        uint32_t code_size = code->size();
-        CHECK_NE(0u, code_size);
-        const std::vector<uint8_t>& vmap_table = compiled_method->GetVmapTable();
-        uint32_t vmap_table_offset = vmap_table.empty() ? 0u
-            : sizeof(OatQuickMethodHeader) + vmap_table.size();
-        const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
-        uint32_t mapping_table_offset = mapping_table.empty() ? 0u
-            : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size();
-        OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset,
-                                           compiled_method->GetFrameSizeInBytes(),
-                                           compiled_method->GetCoreSpillMask(),
-                                           compiled_method->GetFpSpillMask(), code_size);
-
-        header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
-        std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
-        size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table.size();
-        size_t code_offset = compiled_method->AlignCode(size - code_size);
-        size_t padding = code_offset - (size - code_size);
-        chunk->reserve(padding + size);
-        chunk->resize(sizeof(method_header));
-        memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
-        chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
-        chunk->insert(chunk->begin(), mapping_table.begin(), mapping_table.end());
-        chunk->insert(chunk->begin(), padding, 0);
-        chunk->insert(chunk->end(), code->begin(), code->end());
-        CHECK_EQ(padding + size, chunk->size());
-        code_ptr = &(*chunk)[code_offset];
-      } else {
-        code = compiled_method->GetPortableCode();
-        code_ptr = &(*code)[0];
-      }
-      MakeExecutable(code_ptr, code->size());
-      const void* method_code = CompiledMethod::CodePointer(code_ptr,
-                                                            compiled_method->GetInstructionSet());
-      LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
-      OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
-      oat_method.LinkMethod(method);
-      method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
-    } else {
-      // No code? You must mean to go into the interpreter.
-      // Or the generic JNI...
-      if (!method->IsNative()) {
-        const void* method_code = kUsePortableCompiler ? GetPortableToInterpreterBridge()
-                                                       : GetQuickToInterpreterBridge();
-        OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
-        oat_method.LinkMethod(method);
-        method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
-      } else {
-        const void* method_code = reinterpret_cast<void*>(art_quick_generic_jni_trampoline);
-
-        OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
-        oat_method.LinkMethod(method);
-        method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
-      }
-    }
-    // Create bridges to transition between different kinds of compiled bridge.
-    if (method->GetEntryPointFromPortableCompiledCode() == nullptr) {
-      method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
-    } else {
-      CHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
-      method->SetEntryPointFromQuickCompiledCode(GetQuickToPortableBridge());
-      method->SetIsPortableCompiled();
-    }
-  }
-
-  static void MakeExecutable(const void* code_start, size_t code_length) {
-    CHECK(code_start != nullptr);
-    CHECK_NE(code_length, 0U);
-    uintptr_t data = reinterpret_cast<uintptr_t>(code_start);
-    uintptr_t base = RoundDown(data, kPageSize);
-    uintptr_t limit = RoundUp(data + code_length, kPageSize);
-    uintptr_t len = limit - base;
-    int result = mprotect(reinterpret_cast<void*>(base), len, PROT_READ | PROT_WRITE | PROT_EXEC);
-    CHECK_EQ(result, 0);
-
-    // Flush instruction cache
-    // Only uses __builtin___clear_cache if GCC >= 4.3.3
-#if GCC_VERSION >= 40303
-    __builtin___clear_cache(reinterpret_cast<void*>(base), reinterpret_cast<void*>(base + len));
-#else
-    LOG(WARNING) << "UNIMPLEMENTED: cache flush";
-#endif
-  }
+  static void MakeExecutable(const void* code_start, size_t code_length);
 
   void MakeExecutable(mirror::ClassLoader* class_loader, const char* class_name)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string class_descriptor(DotToDescriptor(class_name));
-    Thread* self = Thread::Current();
-    StackHandleScope<1> hs(self);
-    Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
-    mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
-    CHECK(klass != nullptr) << "Class not found " << class_name;
-    for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
-      MakeExecutable(klass->GetDirectMethod(i));
-    }
-    for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
-      MakeExecutable(klass->GetVirtualMethod(i));
-    }
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
-  virtual void SetUp() {
-    CommonRuntimeTest::SetUp();
-    {
-      ScopedObjectAccess soa(Thread::Current());
+  virtual void SetUp();
 
-      InstructionSet instruction_set = kNone;
+  virtual void SetUpRuntimeOptions(RuntimeOptions *options);
 
-      // Take the default set of instruction features from the build.
-      InstructionSetFeatures instruction_set_features =
-          ParseFeatureList(Runtime::GetDefaultInstructionSetFeatures());
-
-#if defined(__arm__)
-      instruction_set = kThumb2;
-      InstructionSetFeatures runtime_features = GuessInstructionFeatures();
-
-      // for ARM, do a runtime check to make sure that the features we are passed from
-      // the build match the features we actually determine at runtime.
-      ASSERT_LE(instruction_set_features, runtime_features);
-#elif defined(__aarch64__)
-      instruction_set = kArm64;
-#elif defined(__mips__)
-      instruction_set = kMips;
-#elif defined(__i386__)
-      instruction_set = kX86;
-#elif defined(__x86_64__)
-      instruction_set = kX86_64;
-#endif
-
-      runtime_->SetInstructionSet(instruction_set);
-      for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-        Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
-        if (!runtime_->HasCalleeSaveMethod(type)) {
-          runtime_->SetCalleeSaveMethod(
-              runtime_->CreateCalleeSaveMethod(type), type);
-        }
-      }
-
-      // TODO: make selectable
-      Compiler::Kind compiler_kind
-          = (kUsePortableCompiler) ? Compiler::kPortable : Compiler::kQuick;
-      timer_.reset(new CumulativeLogger("Compilation times"));
-      compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                                verification_results_.get(),
-                                                method_inliner_map_.get(),
-                                                compiler_kind, instruction_set,
-                                                instruction_set_features,
-                                                true, new CompilerDriver::DescriptorSet,
-                                                2, true, true, timer_.get()));
-    }
-    // We typically don't generate an image in unit tests, disable this optimization by default.
-    compiler_driver_->SetSupportBootImageFixup(false);
-  }
-
-  virtual void SetUpRuntimeOptions(Runtime::Options *options) {
-    CommonRuntimeTest::SetUpRuntimeOptions(options);
-
-    compiler_options_.reset(new CompilerOptions);
-    verification_results_.reset(new VerificationResults(compiler_options_.get()));
-    method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-    callbacks_.reset(new CompilerCallbacksImpl(verification_results_.get(),
-                                               method_inliner_map_.get()));
-    options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
-  }
-
-  virtual void TearDown() {
-    timer_.reset();
-    compiler_driver_.reset();
-    callbacks_.reset();
-    method_inliner_map_.reset();
-    verification_results_.reset();
-    compiler_options_.reset();
-
-    CommonRuntimeTest::TearDown();
-  }
+  virtual void TearDown();
 
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string class_descriptor(DotToDescriptor(class_name));
-    Thread* self = Thread::Current();
-    StackHandleScope<1> hs(self);
-    Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
-    mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
-    CHECK(klass != nullptr) << "Class not found " << class_name;
-    for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
-      CompileMethod(klass->GetDirectMethod(i));
-    }
-    for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
-      CompileMethod(klass->GetVirtualMethod(i));
-    }
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(method != nullptr);
-    TimingLogger timings("CommonTest::CompileMethod", false, false);
-    TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
-    compiler_driver_->CompileOne(method, &timings);
-    TimingLogger::ScopedTiming t2("MakeExecutable", &timings);
-    MakeExecutable(method);
-  }
+  void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
                            const char* method_name, const char* signature)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string class_descriptor(DotToDescriptor(class_name));
-    Thread* self = Thread::Current();
-    mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), class_loader);
-    CHECK(klass != nullptr) << "Class not found " << class_name;
-    mirror::ArtMethod* method = klass->FindDirectMethod(method_name, signature);
-    CHECK(method != nullptr) << "Direct method not found: "
-                             << class_name << "." << method_name << signature;
-    CompileMethod(method);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
                             const char* method_name, const char* signature)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string class_descriptor(DotToDescriptor(class_name));
-    Thread* self = Thread::Current();
-    mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), class_loader);
-    CHECK(klass != nullptr) << "Class not found " << class_name;
-    mirror::ArtMethod* method = klass->FindVirtualMethod(method_name, signature);
-    CHECK(method != NULL) << "Virtual method not found: "
-                          << class_name << "." << method_name << signature;
-    CompileMethod(method);
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void ReserveImageSpace() {
-    // Reserve where the image will be loaded up front so that other parts of test set up don't
-    // accidentally end up colliding with the fixed memory address when we need to load the image.
-    std::string error_msg;
-    image_reservation_.reset(MemMap::MapAnonymous("image reservation",
-                                                  reinterpret_cast<byte*>(ART_BASE_ADDRESS),
-                                                  (size_t)100 * 1024 * 1024,  // 100MB
-                                                  PROT_NONE,
-                                                  false /* no need for 4gb flag with fixed mmap*/,
-                                                  &error_msg));
-    CHECK(image_reservation_.get() != nullptr) << error_msg;
-  }
+  void ReserveImageSpace();
 
-  void UnreserveImageSpace() {
-    image_reservation_.reset();
-  }
+  void UnreserveImageSpace();
 
   std::unique_ptr<CompilerOptions> compiler_options_;
   std::unique_ptr<VerificationResults> verification_results_;
   std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
-  std::unique_ptr<CompilerCallbacksImpl> callbacks_;
+  std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<CumulativeLogger> timer_;
 
diff --git a/runtime/compiled_class.h b/compiler/compiled_class.h
similarity index 88%
rename from runtime/compiled_class.h
rename to compiler/compiled_class.h
index c53d500..b88d613 100644
--- a/runtime/compiled_class.h
+++ b/compiler/compiled_class.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_COMPILED_CLASS_H_
-#define ART_RUNTIME_COMPILED_CLASS_H_
+#ifndef ART_COMPILER_COMPILED_CLASS_H_
+#define ART_COMPILER_COMPILED_CLASS_H_
 
 #include "mirror/class.h"
 
@@ -34,4 +34,4 @@
 
 }  // namespace art
 
-#endif  // ART_RUNTIME_COMPILED_CLASS_H_
+#endif  // ART_COMPILER_COMPILED_CLASS_H_
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index f940b54..bac1f12 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -15,6 +15,7 @@
  */
 
 #include "compilers.h"
+
 #include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir.h"
 #include "elf_writer_quick.h"
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index eb897f0..d1d5ad9 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -71,26 +71,28 @@
 };
 
 /**
- * @class CallInlining
- * @brief Perform method inlining pass.
+ * @class SpecialMethodInliner
+ * @brief Performs method inlining pass on special kinds of methods.
+ * @details Special methods are methods that fall in one of the following categories:
+ * empty, instance getter, instance setter, argument return, and constant return.
  */
-class CallInlining : public PassME {
+class SpecialMethodInliner : public PassME {
  public:
-  CallInlining() : PassME("CallInlining") {
+  SpecialMethodInliner() : PassME("SpecialMethodInliner") {
   }
 
   bool Gate(const PassDataHolder* data) const {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
-    return cUnit->mir_graph->InlineCallsGate();
+    return cUnit->mir_graph->InlineSpecialMethodsGate();
   }
 
   void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InlineCallsStart();
+    cUnit->mir_graph->InlineSpecialMethodsStart();
   }
 
   bool Worker(const PassDataHolder* data) const {
@@ -100,7 +102,7 @@
     DCHECK(cUnit != nullptr);
     BasicBlock* bb = pass_me_data_holder->bb;
     DCHECK(bb != nullptr);
-    cUnit->mir_graph->InlineCalls(bb);
+    cUnit->mir_graph->InlineSpecialMethods(bb);
     // No need of repeating, so just return false.
     return false;
   }
@@ -109,7 +111,7 @@
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InlineCallsEnd();
+    cUnit->mir_graph->InlineSpecialMethodsEnd();
   }
 };
 
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 799a742..69adb35 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -440,19 +440,23 @@
 
 /**
  * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
- * @details Without context sensitive analysis, the most conservative set of barriers
- * must be issued to ensure the Java Memory Model. Thus the recipe is as follows:
- * -# Use StoreStore barrier before volatile store.
- * -# Use StoreLoad barrier after volatile store.
- * -# Use LoadLoad and LoadStore barrier after each volatile load.
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
+ * primitives.  Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load.
  * -# Use StoreStore barrier after all stores but before return from any constructor whose
- * class has final fields.
+ *    class has final fields.
  */
 enum MemBarrierKind {
-  kLoadStore,
-  kLoadLoad,
+  kAnyStore,
+  kLoadAny,
   kStoreStore,
-  kStoreLoad
+  kAnyAny
 };
 
 std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
@@ -544,6 +548,14 @@
 
 std::ostream& operator<<(std::ostream& os, const VolatileKind& kind);
 
+enum WideKind {
+  kNotWide,      // Non-wide view
+  kWide,         // Wide view
+  kRef           // Ref width
+};
+
+std::ostream& operator<<(std::ostream& os, const WideKind& kind);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index f3ef796..d097500 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -96,8 +96,6 @@
     ~0U,
     // 2 = kArm64.     TODO(Arm64): enable optimizations once backend is mature enough.
     (1 << kLoadStoreElimination) |
-    (1 << kLoadHoisting) |
-    (1 << kBBOpt) |
     0,
     // 3 = kThumb2.
     0,
@@ -575,7 +573,7 @@
       // Check if we support the byte code.
       if (std::find(unsupport_list, unsupport_list + unsupport_list_size,
                     opcode) != unsupport_list + unsupport_list_size) {
-        if (!cu.mir_graph->IsPseudoMirOp(opcode)) {
+        if (!MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
           VLOG(compiler) << "Unsupported dalvik byte code : "
               << mir->dalvikInsn.opcode;
         } else {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index e372206..3de4483 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -902,7 +902,7 @@
   while (!done) {
     tbb->visited = true;
     for (MIR* mir = tbb->first_mir_insn; mir != NULL; mir = mir->next) {
-      if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+      if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
         // Skip any MIR pseudo-op.
         continue;
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index bc99a27..b82c5c7 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -909,6 +909,16 @@
   def_v->SetBit(dalvik_reg_id);
 }
 
+void MIRGraph::HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v,
+                            ArenaBitVector* live_in_v,
+                            const MIR::DecodedInstruction& d_insn) {
+  switch (static_cast<int>(d_insn.opcode)) {
+    default:
+      LOG(ERROR) << "Unexpected Extended Opcode " << d_insn.opcode;
+      break;
+  }
+}
+
 /*
  * Find out live-in variables for natural loops. Variables that are live-in in
  * the main loop body are considered to be defined in the entry block.
@@ -966,6 +976,9 @@
         HandleDef(def_v, d_insn->vA+1);
       }
     }
+    if (df_attributes & DF_FORMAT_EXTENDED) {
+      HandleExtended(use_v, def_v, live_in_v, mir->dalvikInsn);
+    }
   }
   return true;
 }
@@ -1048,6 +1061,14 @@
   }
 }
 
+void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) {
+  switch (static_cast<int>(mir->dalvikInsn.opcode)) {
+    default:
+      LOG(ERROR) << "Missing case for extended MIR: " << mir->dalvikInsn.opcode;
+      break;
+  }
+}
+
 /* Entry function to convert a block into SSA representation */
 bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
   MIR* mir;
@@ -1063,7 +1084,7 @@
     uint64_t df_attributes = GetDataFlowAttributes(mir);
 
       // If not a pseudo-op, note non-leaf or can throw
-    if (!IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+    if (!MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
       int flags = Instruction::FlagsOf(mir->dalvikInsn.opcode);
 
       if ((flags & Instruction::kInvoke) != 0 && (mir->optimization_flags & MIR_INLINED) == 0) {
@@ -1083,6 +1104,11 @@
       continue;
     }
 
+    if (df_attributes & DF_FORMAT_EXTENDED) {
+      DataFlowSSAFormatExtended(mir);
+      continue;
+    }
+
     if (df_attributes & DF_HAS_USES) {
       if (df_attributes & DF_UA) {
         num_uses++;
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index 98866d9..68247b7 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -62,9 +62,9 @@
     compiler_driver->GetResolvedFieldDexFileLocation(resolved_field,
         &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_field_idx_);
     bool is_volatile = compiler_driver->IsFieldVolatile(resolved_field);
-
+    it->field_offset_ = resolved_field->GetOffset();
     std::pair<bool, bool> fast_path = compiler_driver->IsFastInstanceField(
-        dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx, &it->field_offset_);
+        dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx);
     it->flags_ = 0u |  // Without kFlagIsStatic.
         (is_volatile ? kFlagIsVolatile : 0u) |
         (fast_path.first ? kFlagFastGet : 0u) |
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 4fbace2..1c8a9b5 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -193,14 +193,16 @@
     bottom_block->successor_block_list_type = orig_block->successor_block_list_type;
     bottom_block->successor_blocks = orig_block->successor_blocks;
     orig_block->successor_block_list_type = kNotUsed;
-    orig_block->successor_blocks = NULL;
+    orig_block->successor_blocks = nullptr;
     GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks);
     while (true) {
       SuccessorBlockInfo* successor_block_info = iterator.Next();
-      if (successor_block_info == NULL) break;
+      if (successor_block_info == nullptr) break;
       BasicBlock* bb = GetBasicBlock(successor_block_info->block);
-      bb->predecessors->Delete(orig_block->id);
-      bb->predecessors->Insert(bottom_block->id);
+      if (bb != nullptr) {
+        bb->predecessors->Delete(orig_block->id);
+        bb->predecessors->Insert(bottom_block->id);
+      }
     }
   }
 
@@ -222,7 +224,7 @@
   DCHECK(insn == bottom_block->first_mir_insn);
   DCHECK_EQ(insn->offset, bottom_block->start_offset);
   DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
-         !IsPseudoMirOp(insn->dalvikInsn.opcode));
+         !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode));
   DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id);
   MIR* p = insn;
   dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
@@ -237,7 +239,7 @@
      * CHECK and work portions. Since the 2nd half of a split operation is always
      * the first in a BasicBlock, we can't hit it here.
      */
-    if ((opcode == kMirOpCheck) || !IsPseudoMirOp(opcode)) {
+    if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
       DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id);
       dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
     }
@@ -861,11 +863,17 @@
 /* Dump the CFG into a DOT graph */
 void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) {
   FILE* file;
+  static AtomicInteger cnt(0);
+
+  // Increment counter to get a unique file number.
+  cnt++;
+
   std::string fname(PrettyMethod(cu_->method_idx, *cu_->dex_file));
   ReplaceSpecialChars(fname);
-  fname = StringPrintf("%s%s%x%s.dot", dir_prefix, fname.c_str(),
+  fname = StringPrintf("%s%s%x%s_%d.dot", dir_prefix, fname.c_str(),
                       GetBasicBlock(GetEntryBlock()->fall_through)->start_offset,
-                      suffix == nullptr ? "" : suffix);
+                      suffix == nullptr ? "" : suffix,
+                      cnt.LoadRelaxed());
   file = fopen(fname.c_str(), "w");
   if (file == NULL) {
     return;
@@ -882,6 +890,7 @@
     BasicBlock* bb = GetBasicBlock(block_idx);
     if (bb == NULL) continue;
     if (bb->block_type == kDead) continue;
+    if (bb->hidden) continue;
     if (bb->block_type == kEntryBlock) {
       fprintf(file, "  entry_%d [shape=Mdiamond];\n", bb->id);
     } else if (bb->block_type == kExitBlock) {
@@ -916,7 +925,8 @@
             } else {
               fprintf(file, "    {%04x %s %s %s %s\\l}%s\\\n", mir->offset,
                       mir->ssa_rep ? GetDalvikDisassembly(mir) :
-                      !IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) :
+                      !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
+                        Instruction::Name(mir->dalvikInsn.opcode) :
                         extended_mir_op_names_[opcode - kMirOpFirst],
                       (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
                       (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
@@ -1222,7 +1232,7 @@
     nop = true;
   }
 
-  if (IsPseudoMirOp(opcode)) {
+  if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
   } else {
     dalvik_format = Instruction::FormatOf(insn.opcode);
@@ -1693,11 +1703,13 @@
   // We visited both taken and fallthrough. Now check if we have successors we need to visit.
   if (have_successors_ == true) {
     // Get information about next successor block.
-    SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
-
-    // If we don't have anymore successors, return nullptr.
-    if (successor_block_info != nullptr) {
-      return mir_graph_->GetBasicBlock(successor_block_info->block);
+    for (SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
+      successor_block_info != nullptr;
+      successor_block_info = successor_iter_.Next()) {
+      // If block was replaced by zero block, take next one.
+      if (successor_block_info->block != NullBasicBlockId) {
+        return mir_graph_->GetBasicBlock(successor_block_info->block);
+      }
     }
   }
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index d097328..1556a19 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -80,6 +80,7 @@
   kSetsConst,
   kFormat35c,
   kFormat3rc,
+  kFormatExtended,       // Extended format for extended MIRs.
   kNullCheckSrc0,        // Null check of uses[0].
   kNullCheckSrc1,        // Null check of uses[1].
   kNullCheckSrc2,        // Null check of uses[2].
@@ -118,6 +119,7 @@
 #define DF_SETS_CONST           (UINT64_C(1) << kSetsConst)
 #define DF_FORMAT_35C           (UINT64_C(1) << kFormat35c)
 #define DF_FORMAT_3RC           (UINT64_C(1) << kFormat3rc)
+#define DF_FORMAT_EXTENDED      (UINT64_C(1) << kFormatExtended)
 #define DF_NULL_CHK_0           (UINT64_C(1) << kNullCheckSrc0)
 #define DF_NULL_CHK_1           (UINT64_C(1) << kNullCheckSrc1)
 #define DF_NULL_CHK_2           (UINT64_C(1) << kNullCheckSrc2)
@@ -284,34 +286,46 @@
      */
     bool GetConstant(int64_t* ptr_value, bool* wide) const;
 
+    static bool IsPseudoMirOp(Instruction::Code opcode) {
+      return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
+    }
+
+    static bool IsPseudoMirOp(int opcode) {
+      return opcode >= static_cast<int>(kMirOpFirst);
+    }
+
+    bool IsInvoke() const {
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kInvoke) == Instruction::kInvoke);
+    }
+
     bool IsStore() const {
-      return ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore);
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore);
     }
 
     bool IsLoad() const {
-      return ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad);
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad);
     }
 
     bool IsConditionalBranch() const {
-      return (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch));
+      return !IsPseudoMirOp(opcode) && (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch));
     }
 
     /**
      * @brief Is the register C component of the decoded instruction a constant?
      */
     bool IsCFieldOrConstant() const {
-      return ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant);
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant);
     }
 
     /**
      * @brief Is the register C component of the decoded instruction a constant?
      */
     bool IsBFieldOrConstant() const {
-      return ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant);
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant);
     }
 
     bool IsCast() const {
-      return ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast);
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast);
     }
 
     /**
@@ -321,11 +335,11 @@
      *            when crossing such an instruction.
      */
     bool Clobbers() const {
-      return ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber);
+      return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber);
     }
 
     bool IsLinear() const {
-      return (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0;
+      return !IsPseudoMirOp(opcode) && (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0;
     }
   } dalvikInsn;
 
@@ -877,14 +891,6 @@
     return backward_branches_ + forward_branches_;
   }
 
-  static bool IsPseudoMirOp(Instruction::Code opcode) {
-    return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
-  }
-
-  static bool IsPseudoMirOp(int opcode) {
-    return opcode >= static_cast<int>(kMirOpFirst);
-  }
-
   // Is this vreg in the in set?
   bool IsInVReg(int vreg) {
     return (vreg >= cu_->num_regs);
@@ -956,10 +962,10 @@
   void ComputeTopologicalSortOrder();
   BasicBlock* CreateNewBB(BBType block_type);
 
-  bool InlineCallsGate();
-  void InlineCallsStart();
-  void InlineCalls(BasicBlock* bb);
-  void InlineCallsEnd();
+  bool InlineSpecialMethodsGate();
+  void InlineSpecialMethodsStart();
+  void InlineSpecialMethods(BasicBlock* bb);
+  void InlineSpecialMethodsEnd();
 
   /**
    * @brief Perform the initial preparation for the Method Uses.
@@ -1059,6 +1065,9 @@
   void HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
                        ArenaBitVector* live_in_v, int dalvik_reg_id);
   void HandleDef(ArenaBitVector* def_v, int dalvik_reg_id);
+  void HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v,
+                      ArenaBitVector* live_in_v,
+                      const MIR::DecodedInstruction& d_insn);
   bool DoSSAConversion(BasicBlock* bb);
   bool InvokeUsesMethodStar(MIR* mir);
   int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
@@ -1080,6 +1089,7 @@
   void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
   void DataFlowSSAFormat35C(MIR* mir);
   void DataFlowSSAFormat3RC(MIR* mir);
+  void DataFlowSSAFormatExtended(MIR* mir);
   bool FindLocalLiveIn(BasicBlock* bb);
   bool VerifyPredInfo(BasicBlock* bb);
   BasicBlock* NeedsVisit(BasicBlock* bb);
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index dc1057f..869c48f 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -137,7 +137,7 @@
       break;
     }
     // Keep going if pseudo op, otherwise terminate
-    if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+    if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
       mir = AdvanceMIR(&tbb, mir);
     } else {
       mir = NULL;
@@ -877,7 +877,7 @@
           struct BasicBlock* next_bb = GetBasicBlock(bb->fall_through);
           for (MIR* tmir = next_bb->first_mir_insn; tmir != NULL;
             tmir =tmir->next) {
-            if (IsPseudoMirOp(tmir->dalvikInsn.opcode)) {
+            if (MIR::DecodedInstruction::IsPseudoMirOp(tmir->dalvikInsn.opcode)) {
               continue;
             }
             // First non-pseudo should be MOVE_RESULT_OBJECT
@@ -1220,7 +1220,7 @@
   iget_or_iput->meta.ifield_lowering_info = field_info_index;
 }
 
-bool MIRGraph::InlineCallsGate() {
+bool MIRGraph::InlineSpecialMethodsGate() {
   if ((cu_->disable_opt & (1 << kSuppressMethodInlining)) != 0 ||
       method_lowering_infos_.Size() == 0u) {
     return false;
@@ -1232,7 +1232,7 @@
   return true;
 }
 
-void MIRGraph::InlineCallsStart() {
+void MIRGraph::InlineSpecialMethodsStart() {
   // Prepare for inlining getters/setters. Since we're inlining at most 1 IGET/IPUT from
   // each INVOKE, we can index the data by the MIR::meta::method_lowering_info index.
 
@@ -1246,12 +1246,12 @@
       temp_bit_vector_size_ * sizeof(*temp_insn_data_), kArenaAllocGrowableArray));
 }
 
-void MIRGraph::InlineCalls(BasicBlock* bb) {
+void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+    if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
       continue;
     }
     if (!(Instruction::FlagsOf(mir->dalvikInsn.opcode) & Instruction::kInvoke)) {
@@ -1270,17 +1270,17 @@
     MethodReference target = method_info.GetTargetMethod();
     if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(target.dex_file)
             ->GenInline(this, bb, mir, target.dex_method_index)) {
-      if (cu_->verbose) {
-        LOG(INFO) << "In \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
-            << "\" @0x" << std::hex << mir->offset
-            << " inlined " << method_info.GetInvokeType() << " (" << sharp_type << ") call to \""
-            << PrettyMethod(target.dex_method_index, *target.dex_file) << "\"";
+      if (cu_->verbose || cu_->print_pass) {
+        LOG(INFO) << "SpecialMethodInliner: Inlined " << method_info.GetInvokeType() << " ("
+            << sharp_type << ") call to \"" << PrettyMethod(target.dex_method_index, *target.dex_file)
+            << "\" from \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file)
+            << "\" @0x" << std::hex << mir->offset;
       }
     }
   }
 }
 
-void MIRGraph::InlineCallsEnd() {
+void MIRGraph::InlineSpecialMethodsEnd() {
   DCHECK(temp_insn_data_ != nullptr);
   temp_insn_data_ = nullptr;
   DCHECK(temp_bit_vector_ != nullptr);
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 4c9bed6..c72a4a6 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -35,7 +35,7 @@
 const Pass* const PassDriver<PassDriverMEOpts>::g_passes[] = {
   GetPassInstance<CacheFieldLoweringInfo>(),
   GetPassInstance<CacheMethodLoweringInfo>(),
-  GetPassInstance<CallInlining>(),
+  GetPassInstance<SpecialMethodInliner>(),
   GetPassInstance<CodeLayout>(),
   GetPassInstance<NullCheckEliminationAndTypeInference>(),
   GetPassInstance<ClassInitCheckElimination>(),
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 04d6898..6b96e92 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -190,7 +190,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+      if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -218,7 +218,7 @@
 
     LIR* success_target = NewLIR0(kPseudoTargetLabel);
     lock_success_branch->target = success_target;
-    GenMemBarrier(kLoadLoad);
+    GenMemBarrier(kLoadAny);
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
@@ -240,7 +240,7 @@
     LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
     OpEndIT(it);
     MarkSafepointPC(call_inst);
-    GenMemBarrier(kLoadLoad);
+    GenMemBarrier(kLoadAny);
   }
 }
 
@@ -261,7 +261,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+      if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -269,7 +269,7 @@
     MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
-    GenMemBarrier(kStoreLoad);
+    GenMemBarrier(kAnyStore);
     Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
@@ -298,7 +298,7 @@
     OpRegReg(kOpCmp, rs_r1, rs_r2);
 
     LIR* it = OpIT(kCondEq, "EE");
-    if (GenMemBarrier(kStoreLoad)) {
+    if (GenMemBarrier(kAnyStore)) {
       UpdateIT(it, "TEE");
     }
     Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
@@ -362,7 +362,7 @@
       Thread::kStackOverflowSignalReservedBytes;
   bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       if (!large_frame) {
         /* Load stack limit */
         LockTemp(rs_r12);
@@ -401,7 +401,7 @@
   const int spill_size = spill_count * 4;
   const int frame_size_without_spills = frame_size_ - spill_size;
   if (!skip_overflow_check) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 43db24c..582af51 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -85,8 +85,6 @@
     size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
-    // Check support for volatile load/store of a given size.
-    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
     // Get the register class for load/store of a field.
     RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
@@ -198,6 +196,13 @@
     RegStorage AllocPreservedDouble(int s_reg);
     RegStorage AllocPreservedSingle(int s_reg);
 
+    bool WideGPRsAreAliases() OVERRIDE {
+      return false;  // Wide GPRs are formed by pairing.
+    }
+    bool WideFPRsAreAliases() OVERRIDE {
+      return false;  // Wide FPRs are formed by pairing.
+    }
+
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 95071d9..2fcc3a5 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -341,7 +341,7 @@
  * is responsible for setting branch target field.
  */
 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
-  LIR* branch;
+  LIR* branch = nullptr;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
   /*
    * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
@@ -354,14 +354,22 @@
    */
   bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
   skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
-  if (!skip && reg.Low8() && (check_value == 0) &&
-     ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
-    branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
-                     reg.GetReg(), 0);
-  } else {
+  if (!skip && reg.Low8() && (check_value == 0)) {
+    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
+      branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
+                       reg.GetReg(), 0);
+    } else if (arm_cond == kArmCondLs) {
+      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
+      // This case happens for a bounds check of array[0].
+      branch = NewLIR2(kThumb2Cbz, reg.GetReg(), 0);
+    }
+  }
+
+  if (branch == nullptr) {
     OpRegImm(kOpCmp, reg, check_value);
     branch = NewLIR2(kThumbBCond, 0, arm_cond);
   }
+
   branch->target = target;
   return branch;
 }
@@ -764,6 +772,7 @@
   UNIMPLEMENTED(FATAL) << "Should not be called.";
 }
 
+// Generate a CAS with memory_order_seq_cst semantics.
 bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
@@ -818,8 +827,8 @@
     }
   }
 
-  // Release store semantics, get the barrier out of the way.  TODO: revisit
-  GenMemBarrier(kStoreLoad);
+  // Prevent reordering with prior memory operations.
+  GenMemBarrier(kAnyStore);
 
   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
@@ -908,6 +917,9 @@
     FreeTemp(rl_expected.reg);  // Now unneeded.
   }
 
+  // Prevent reordering with subsequent memory operations.
+  GenMemBarrier(kLoadAny);
+
   // result := (tmp1 != 0) ? 0 : 1;
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
@@ -987,10 +999,10 @@
   int dmb_flavor;
   // TODO: revisit Arm barrier kinds
   switch (barrier_kind) {
-    case kLoadStore: dmb_flavor = kISH; break;
-    case kLoadLoad: dmb_flavor = kISH; break;
+    case kAnyStore: dmb_flavor = kISH; break;
+    case kLoadAny: dmb_flavor = kISH; break;
     case kStoreStore: dmb_flavor = kISHST; break;
-    case kStoreLoad: dmb_flavor = kISH; break;
+    case kAnyAny: dmb_flavor = kISH; break;
     default:
       LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
       dmb_flavor = kSY;  // quiet gcc.
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index ef94bbc..8cc7596 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -538,10 +538,6 @@
   return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond));
 }
 
-bool ArmMir2Lir::SupportsVolatileLoadStore(OpSize size) {
-  return true;
-}
-
 RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
   if (UNLIKELY(is_volatile)) {
     // On arm, atomic 64-bit load/store requires a core register pair.
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 2d5e291..9cbf7b8 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -986,10 +986,7 @@
   }
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // Without context sensitive analysis, we must issue the most conservative barriers.
-    // In this case, either a load or store may follow so we issue both barriers.
-    GenMemBarrier(kLoadLoad);
-    GenMemBarrier(kLoadStore);
+    GenMemBarrier(kLoadAny);
   }
 
   return load;
@@ -1091,8 +1088,8 @@
 LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size, VolatileKind is_volatile) {
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // There might have been a store before this volatile one so insert StoreStore barrier.
-    GenMemBarrier(kStoreStore);
+    // Ensure that prior accesses become visible to other threads first.
+    GenMemBarrier(kAnyStore);
   }
 
   LIR* store;
@@ -1135,8 +1132,9 @@
   }
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // A load might follow the volatile store so insert a StoreLoad barrier.
-    GenMemBarrier(kStoreLoad);
+    // Preserve order with respect to any subsequent volatile loads.
+    // We need StoreLoad, but that generally requires the most expensive barrier.
+    GenMemBarrier(kAnyAny);
   }
 
   return store;
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 5077d11..d0633af 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -245,6 +245,7 @@
   kA64Cmp3RdT,       // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
   kA64Csel4rrrc,     // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
   kA64Csinc4rrrc,    // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Csinv4rrrc,    // csinv [s1011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
   kA64Csneg4rrrc,    // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
   kA64Dmb1B,         // dmb [11010101000000110011] CRm[11-8] [10111111].
   kA64Eor3Rrl,       // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
@@ -318,6 +319,7 @@
   kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
   kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
   kA64Smaddl4xwwx,   // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
+  kA64Smulh3xxx,     // smulh [10011011010] rm[20-16] [011111] rn[9-5] rd[4-0].
   kA64Stp4ffXD,      // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64Stp4rrXD,      // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64StpPost4rrXD,  // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
@@ -375,6 +377,7 @@
   kST = 0xe,
   kISH = 0xb,
   kISHST = 0xa,
+  kISHLD = 0x9,
   kNSH = 0x7,
   kNSHST = 0x6
 };
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index e10f7cf..462be54 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -105,7 +105,7 @@
 const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
     ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "adc", "!0r, !1r, !2r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000),
                  kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
@@ -113,7 +113,7 @@
                  "add", "!0R, !1R, #!2d!3T", kFixupNone),
     ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
                  "add", "!0r, !1r, !2r!3o", kFixupNone),
     ENCODING_MAP(WIDE(kA64Add4RRre), SF_VARIANTS(0x0b200000),
                  kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
@@ -204,6 +204,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
                  "csinc", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csinv4rrrc), SF_VARIANTS(0x5a800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csinv", "!0r, !1r, !2r, !3c", kFixupNone),
     ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
@@ -498,6 +502,10 @@
                  kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
                  kFmtRegX, 14, 10, IS_QUAD_OP | REG_DEF0_USE123,
                  "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+    ENCODING_MAP(kA64Smulh3xxx, NO_VARIANTS(0x9b407c00),
+                 kFmtRegX, 4, 0, kFmtRegX, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "smulh", "!0x, !1x, !2x", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
                  kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
                  kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
@@ -647,10 +655,10 @@
             if (kIsDebugBuild && (kFailOnSizeError || kReportSizeError)) {
               // Register usage checks: First establish register usage requirements based on the
               // format in `kind'.
-              bool want_float = false;
-              bool want_64_bit = false;
-              bool want_var_size = true;
-              bool want_zero = false;
+              bool want_float = false;     // Want a float (rather than core) register.
+              bool want_64_bit = false;    // Want a 64-bit (rather than 32-bit) register.
+              bool want_var_size = true;   // Want register with variable size (kFmtReg{R,F}).
+              bool want_zero = false;      // Want the zero (rather than sp) register.
               switch (kind) {
                 case kFmtRegX:
                   want_64_bit = true;
@@ -709,9 +717,6 @@
                 }
               }
 
-              // TODO(Arm64): if !want_size_match, then we still should compare the size of the
-              //   register with the size required by the instruction width (kA64Wide).
-
               // Fail, if `expected' contains an unsatisfied requirement.
               if (expected != nullptr) {
                 LOG(WARNING) << "Method: " << PrettyMethod(cu_->method_idx, *cu_->dex_file)
@@ -726,11 +731,12 @@
               }
             }
 
-            // TODO(Arm64): this may or may not be necessary, depending on how wzr, xzr are
-            //   defined.
-            if (is_zero) {
-              operand = 31;
-            }
+            // In the lines below, we rely on (operand & 0x1f) == 31 to be true for register sp
+            // and zr. This means that these two registers do not need any special treatment, as
+            // their bottom 5 bits are correctly set to 31 == 0b11111, which is the right
+            // value for encoding both sp and zr.
+            COMPILE_ASSERT((rxzr & 0x1f) == 0x1f, rzr_register_number_must_be_31);
+            COMPILE_ASSERT((rsp & 0x1f) == 0x1f, rsp_register_number_must_be_31);
           }
 
           value = (operand << encoder->field_loc[i].start) &
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 56dcbe5..d946ee3 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -195,14 +195,14 @@
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_x0);  // = TargetRefReg(kArg0)
+  LoadValueDirectFixed(rl_src, rs_x0);  // = TargetReg(kArg0, kRef)
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -228,7 +228,7 @@
 
   LIR* success_target = NewLIR0(kPseudoTargetLabel);
   lock_success_branch->target = success_target;
-  GenMemBarrier(kLoadLoad);
+  GenMemBarrier(kLoadAny);
 }
 
 /*
@@ -250,7 +250,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -258,7 +258,7 @@
   Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
   MarkPossibleNullPointerException(opt_flags);
   LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
-  GenMemBarrier(kStoreLoad);
+  GenMemBarrier(kAnyStore);
   Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_wzr);
   LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
@@ -338,7 +338,7 @@
   const int frame_size_without_spills = frame_size_ - spill_size;
 
   if (!skip_overflow_check) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       if (!large_frame) {
         // Load stack limit
         LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
@@ -371,7 +371,7 @@
   }
 
   if (!skip_overflow_check) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 7d75da9..f51145c 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -65,8 +65,12 @@
     // Required for target - codegen helpers.
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
                             RegLocation rl_dest, int lit) OVERRIDE;
+    bool SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
+                              RegLocation rl_dest, int64_t lit);
     bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
                           RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+    bool HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
+                            RegLocation rl_src, RegLocation rl_dest, int64_t lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
@@ -96,25 +100,19 @@
                               RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
     LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                           int offset, int check_value, LIR* target) OVERRIDE;
+                           int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
-    RegStorage TargetReg(SpecialTargetRegister symbolic_reg, bool is_wide) OVERRIDE {
-      RegStorage reg = TargetReg(symbolic_reg);
-      if (is_wide) {
-        return (reg.Is64Bit()) ? reg : As64BitReg(reg);
+    RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE {
+      if (wide_kind == kWide || wide_kind == kRef) {
+        return As64BitReg(TargetReg(symbolic_reg));
       } else {
-        return (reg.Is32Bit()) ? reg : As32BitReg(reg);
+        return Check32BitReg(TargetReg(symbolic_reg));
       }
     }
-    RegStorage TargetRefReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
-      RegStorage reg = TargetReg(symbolic_reg);
-      return (reg.Is64Bit() ? reg : As64BitReg(reg));
-    }
     RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
-      RegStorage reg = TargetReg(symbolic_reg);
-      return (reg.Is64Bit() ? reg : As64BitReg(reg));
+      return As64BitReg(TargetReg(symbolic_reg));
     }
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -148,8 +146,6 @@
     size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
-    // Check support for volatile load/store of a given size.
-    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
     // Get the register class for load/store of a field.
     RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
@@ -298,6 +294,13 @@
                            bool skip_this);
     InToRegStorageMapping in_to_reg_storage_mapping_;
 
+    bool WideGPRsAreAliases() OVERRIDE {
+      return true;  // 64b architecture.
+    }
+    bool WideFPRsAreAliases() OVERRIDE {
+      return true;  // 64b architecture.
+    }
+
   private:
     /**
      * @brief Given register xNN (dNN), returns register wNN (sNN).
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index e8f5cb9..2b78e81 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -91,17 +91,134 @@
   RegLocation rl_dest = mir_graph_->GetDest(mir);
   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+
   rl_src = LoadValue(rl_src, src_reg_class);
+  // rl_src may be aliased with rl_result/rl_dest, so do compare early.
+  OpRegImm(kOpCmp, rl_src.reg, 0);
+
   ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
 
-  RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
-  RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-  rl_true = LoadValue(rl_true, result_reg_class);
-  rl_false = LoadValue(rl_false, result_reg_class);
-  rl_result = EvalLoc(rl_dest, result_reg_class, true);
-  OpRegImm(kOpCmp, rl_src.reg, 0);
-  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(),
-          rl_false.reg.GetReg(), code);
+  // The kMirOpSelect has two variants, one for constants and one for moves.
+  bool is_wide = rl_dest.ref || rl_dest.wide;
+
+  if (mir->ssa_rep->num_uses == 1) {
+    uint32_t true_val = mir->dalvikInsn.vB;
+    uint32_t false_val = mir->dalvikInsn.vC;
+
+    int opcode;             // The opcode.
+    int left_op, right_op;  // The operands.
+    bool rl_result_evaled = false;
+
+    // Check some simple cases.
+    // TODO: Improve this.
+    int zero_reg = (is_wide ? rs_xzr : rs_wzr).GetReg();
+
+    if ((true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0)) {
+      // CSInc cheap based on wzr.
+      if (true_val == 1) {
+        // Negate.
+        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+      }
+
+      left_op = right_op = zero_reg;
+      opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
+    } else if ((true_val == 0 && false_val == 0xFFFFFFFF) ||
+               (true_val == 0xFFFFFFFF && false_val == 0)) {
+      // CSneg cheap based on wzr.
+      if (true_val == 0xFFFFFFFF) {
+        // Negate.
+        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+      }
+
+      left_op = right_op = zero_reg;
+      opcode = is_wide ? WIDE(kA64Csinv4rrrc) : kA64Csinv4rrrc;
+    } else if (true_val == 0 || false_val == 0) {
+      // Csel half cheap based on wzr.
+      rl_result = EvalLoc(rl_dest, result_reg_class, true);
+      rl_result_evaled = true;
+      if (false_val == 0) {
+        // Negate.
+        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+      }
+      LoadConstantNoClobber(rl_result.reg, true_val == 0 ? false_val : true_val);
+      left_op = zero_reg;
+      right_op = rl_result.reg.GetReg();
+      opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
+    } else if (true_val == 1 || false_val == 1) {
+      // CSInc half cheap based on wzr.
+      rl_result = EvalLoc(rl_dest, result_reg_class, true);
+      rl_result_evaled = true;
+      if (true_val == 1) {
+        // Negate.
+        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+      }
+      LoadConstantNoClobber(rl_result.reg, true_val == 1 ? false_val : true_val);
+      left_op = rl_result.reg.GetReg();
+      right_op = zero_reg;
+      opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
+    } else if (true_val == 0xFFFFFFFF || false_val == 0xFFFFFFFF) {
+      // CSneg half cheap based on wzr.
+      rl_result = EvalLoc(rl_dest, result_reg_class, true);
+      rl_result_evaled = true;
+      if (true_val == 0xFFFFFFFF) {
+        // Negate.
+        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+      }
+      LoadConstantNoClobber(rl_result.reg, true_val == 0xFFFFFFFF ? false_val : true_val);
+      left_op = rl_result.reg.GetReg();
+      right_op = zero_reg;
+      opcode = is_wide ? WIDE(kA64Csinv4rrrc) : kA64Csinv4rrrc;
+    } else if ((true_val + 1 == false_val) || (false_val + 1 == true_val)) {
+      // Load a constant and use CSinc. Use rl_result.
+      if (false_val + 1 == true_val) {
+        // Negate.
+        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
+        true_val = false_val;
+      }
+
+      rl_result = EvalLoc(rl_dest, result_reg_class, true);
+      rl_result_evaled = true;
+      LoadConstantNoClobber(rl_result.reg, true_val);
+      left_op = right_op = rl_result.reg.GetReg();
+      opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
+    } else {
+      // Csel. The rest. Use rl_result and a temp.
+      // TODO: To minimize the constants being loaded, check whether one can be inexpensively
+      //       loaded as n - 1 or ~n.
+      rl_result = EvalLoc(rl_dest, result_reg_class, true);
+      rl_result_evaled = true;
+      LoadConstantNoClobber(rl_result.reg, true_val);
+      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
+      if (rl_dest.wide) {
+        if (t_reg2.Is32Bit()) {
+          t_reg2 = As64BitReg(t_reg2);
+        }
+      }
+      LoadConstantNoClobber(t_reg2, false_val);
+
+      // Use csel.
+      left_op = rl_result.reg.GetReg();
+      right_op = t_reg2.GetReg();
+      opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
+    }
+
+    if (!rl_result_evaled) {
+      rl_result = EvalLoc(rl_dest, result_reg_class, true);
+    }
+
+    NewLIR4(opcode, rl_result.reg.GetReg(), left_op, right_op, code);
+  } else {
+    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
+    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
+
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
+
+    int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
+    NewLIR4(opcode, rl_result.reg.GetReg(),
+            rl_true.reg.GetReg(), rl_false.reg.GetReg(), code);
+  }
   StoreValue(rl_dest, rl_result);
 }
 
@@ -110,7 +227,6 @@
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
   LIR* taken = &block_label_list_[bb->taken];
   LIR* not_taken = &block_label_list_[bb->fall_through];
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   // Normalize such that if either operand is constant, src2 will be constant.
   ConditionCode ccode = mir->meta.ccode;
   if (rl_src1.is_const) {
@@ -118,16 +234,22 @@
     ccode = FlipComparisonOrder(ccode);
   }
 
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+
   if (rl_src2.is_const) {
-    rl_src2 = UpdateLocWide(rl_src2);
+    // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
+
     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
     // Special handling using cbz & cbnz.
     if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
       OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
       OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
       return;
+    }
+
     // Only handle Imm if src2 is not already in a register.
-    } else if (rl_src2.location != kLocPhysReg) {
+    rl_src2 = UpdateLocWide(rl_src2);
+    if (rl_src2.location != kLocPhysReg) {
       OpRegImm64(kOpCmp, rl_src1.reg, val);
       OpCondBranch(ccode, taken);
       OpCondBranch(NegateComparison(ccode), not_taken);
@@ -147,23 +269,35 @@
  */
 LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
                                   LIR* target) {
-  LIR* branch;
+  LIR* branch = nullptr;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
-  if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
-    ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
-    ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
-    branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
-  } else {
+  if (check_value == 0) {
+    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
+      ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
+    } else if (arm_cond == kArmCondLs) {
+      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
+      // This case happens for a bounds check of array[0].
+      ArmOpcode opcode = kA64Cbz2rt;
+      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
+    }
+  }
+
+  if (branch == nullptr) {
     OpRegImm(kOpCmp, reg, check_value);
     branch = NewLIR2(kA64B2ct, arm_cond, 0);
   }
+
   branch->target = target;
   return branch;
 }
 
 LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
                                      RegStorage base_reg, int offset, int check_value,
-                                     LIR* target) {
+                                     LIR* target, LIR** compare) {
+  DCHECK(compare == nullptr);
   // It is possible that temp register is 64-bit. (ArgReg or RefReg)
   // Always compare 32-bit value no matter what temp_reg is.
   if (temp_reg.Is64Bit()) {
@@ -248,28 +382,31 @@
 
 // Table of magic divisors
 struct MagicTable {
-  uint32_t magic;
+  int magic64_base;
+  int magic64_eor;
+  uint64_t magic64;
+  uint32_t magic32;
   uint32_t shift;
   DividePattern pattern;
 };
 
 static const MagicTable magic_table[] = {
-  {0, 0, DivideNone},        // 0
-  {0, 0, DivideNone},        // 1
-  {0, 0, DivideNone},        // 2
-  {0x55555556, 0, Divide3},  // 3
-  {0, 0, DivideNone},        // 4
-  {0x66666667, 1, Divide5},  // 5
-  {0x2AAAAAAB, 0, Divide3},  // 6
-  {0x92492493, 2, Divide7},  // 7
-  {0, 0, DivideNone},        // 8
-  {0x38E38E39, 1, Divide5},  // 9
-  {0x66666667, 2, Divide5},  // 10
-  {0x2E8BA2E9, 1, Divide5},  // 11
-  {0x2AAAAAAB, 1, Divide5},  // 12
-  {0x4EC4EC4F, 2, Divide5},  // 13
-  {0x92492493, 3, Divide7},  // 14
-  {0x88888889, 3, Divide7},  // 15
+  {   0,      0,                  0,          0, 0, DivideNone},  // 0
+  {   0,      0,                  0,          0, 0, DivideNone},  // 1
+  {   0,      0,                  0,          0, 0, DivideNone},  // 2
+  {0x3c,     -1, 0x5555555555555556, 0x55555556, 0, Divide3},     // 3
+  {   0,      0,                  0,          0, 0, DivideNone},  // 4
+  {0xf9,     -1, 0x6666666666666667, 0x66666667, 1, Divide5},     // 5
+  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3},     // 6
+  {  -1,     -1, 0x924924924924924A, 0x92492493, 2, Divide7},     // 7
+  {   0,      0,                  0,          0, 0, DivideNone},  // 8
+  {  -1,     -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5},     // 9
+  {0xf9,     -1, 0x6666666666666667, 0x66666667, 2, Divide5},     // 10
+  {  -1,     -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5},     // 11
+  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5},     // 12
+  {  -1,     -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5},     // 13
+  {  -1,     -1, 0x924924924924924A, 0x92492493, 3, Divide7},     // 14
+  {0x78,     -1, 0x8888888888888889, 0x88888889, 3, Divide7},     // 15
 };
 
 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
@@ -288,7 +425,7 @@
   }
 
   RegStorage r_magic = AllocTemp();
-  LoadConstant(r_magic, magic_table[lit].magic);
+  LoadConstant(r_magic, magic_table[lit].magic32);
   rl_src = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage r_long_mul = AllocTemp();
@@ -317,49 +454,144 @@
   return true;
 }
 
+bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
+                                        RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
+  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
+    return false;
+  }
+  DividePattern pattern = magic_table[lit].pattern;
+  if (pattern == DivideNone) {
+    return false;
+  }
+  // Tuning: add rem patterns
+  if (!is_div) {
+    return false;
+  }
+
+  RegStorage r_magic = AllocTempWide();
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  RegStorage r_long_mul = AllocTempWide();
+
+  if (magic_table[lit].magic64_base >= 0) {
+    // Check that the entry in the table is correct.
+    if (kIsDebugBuild) {
+      uint64_t reconstructed_imm;
+      uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
+      if (magic_table[lit].magic64_eor >= 0) {
+        uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
+        reconstructed_imm = base ^ eor;
+      } else {
+        reconstructed_imm = base + 1;
+      }
+      DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
+    }
+
+    // Load the magic constant in two instructions.
+    NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
+    if (magic_table[lit].magic64_eor >= 0) {
+      NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
+              magic_table[lit].magic64_eor);
+    } else {
+      NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
+    }
+  } else {
+    LoadConstantWide(r_magic, magic_table[lit].magic64);
+  }
+
+  NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
+  switch (pattern) {
+    case Divide3:
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
+      break;
+    case Divide5:
+      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
+      break;
+    case Divide7:
+      OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
+      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected pattern: " << pattern;
+  }
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
 // Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
 // and store the result in 'rl_dest'.
 bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
+  return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
+}
+
+// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
+// and store the result in 'rl_dest'.
+bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
+                                      RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
+  const bool is_64bit = rl_dest.wide;
+  const int nbits = (is_64bit) ? 64 : 32;
+
   if (lit < 2) {
     return false;
   }
   if (!IsPowerOfTwo(lit)) {
-    return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
+    if (is_64bit) {
+      return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
+    } else {
+      return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
+    }
   }
   int k = LowestSetBit(lit);
-  if (k >= 30) {
+  if (k >= nbits - 2) {
     // Avoid special cases.
     return false;
   }
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  RegLocation rl_result;
+  RegStorage t_reg;
+  if (is_64bit) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    t_reg = AllocTempWide();
+  } else {
+    rl_src = LoadValue(rl_src, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    t_reg = AllocTemp();
+  }
+
+  int shift = EncodeShift(kA64Lsr, nbits - k);
   if (is_div) {
-    RegStorage t_reg = AllocTemp();
     if (lit == 2) {
       // Division by 2 is by far the most common division by constant.
-      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
       OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
     } else {
-      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
-      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
+      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
       OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
     }
   } else {
-    RegStorage t_reg = AllocTemp();
     if (lit == 2) {
-      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
-      OpRegRegImm(kOpAnd, t_reg, t_reg, lit - 1);
-      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
+      OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
+      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
     } else {
-      RegStorage t_reg2 = AllocTemp();
-      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
-      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, EncodeShift(kA64Lsr, 32 - k));
-      OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit - 1);
-      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, EncodeShift(kA64Lsr, 32 - k));
+      RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
+      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
+      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
+      OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
+      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
     }
   }
-  StoreValue(rl_dest, rl_result);
+
+  if (is_64bit) {
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    StoreValue(rl_dest, rl_result);
+  }
   return true;
 }
 
@@ -368,12 +600,6 @@
   return false;
 }
 
-RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                    RegLocation rl_src2, bool is_div, bool check_zero) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
-  return rl_dest;
-}
-
 RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
   return rl_dest;
@@ -393,8 +619,14 @@
   return rl_result;
 }
 
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                    RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
+  return rl_dest;
+}
+
 RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
-                                  bool is_div) {
+                                    bool is_div) {
   CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
 
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -631,10 +863,16 @@
   int dmb_flavor;
   // TODO: revisit Arm barrier kinds
   switch (barrier_kind) {
-    case kLoadStore: dmb_flavor = kISH; break;
-    case kLoadLoad: dmb_flavor = kISH; break;
+    case kAnyStore: dmb_flavor = kISH; break;
+    case kLoadAny: dmb_flavor = kISH; break;
+        // We conjecture that kISHLD is insufficient.  It is documented
+        // to provide LoadLoad | StoreStore ordering.  But if this were used
+        // to implement volatile loads, we suspect that the lack of store
+        // atomicity on ARM would cause us to allow incorrect results for
+        // the canonical IRIW example.  But we're not sure.
+        // We should be using acquire loads instead.
     case kStoreStore: dmb_flavor = kISHST; break;
-    case kStoreLoad: dmb_flavor = kISH; break;
+    case kAnyAny: dmb_flavor = kISH; break;
     default:
       LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
       dmb_flavor = kSY;  // quiet gcc.
@@ -670,6 +908,14 @@
 
 void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
                                  RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
+  if (rl_src2.is_const) {
+    DCHECK(rl_src2.wide);
+    int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
+    if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
+      return;
+    }
+  }
+
   RegLocation rl_result;
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
@@ -935,7 +1181,7 @@
 }
 
 void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
   OpKind op = kOpBkpt;
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index ef9dbdd..f1dc77a 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -108,19 +108,19 @@
 RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
-    case kSelf: res_reg = rs_xSELF; break;
-    case kSuspend: res_reg = rs_xSUSPEND; break;
-    case kLr: res_reg =  rs_xLR; break;
+    case kSelf: res_reg = rs_wSELF; break;
+    case kSuspend: res_reg = rs_wSUSPEND; break;
+    case kLr: res_reg =  rs_wLR; break;
     case kPc: res_reg = RegStorage::InvalidReg(); break;
-    case kSp: res_reg =  rs_sp; break;
-    case kArg0: res_reg = rs_x0; break;
-    case kArg1: res_reg = rs_x1; break;
-    case kArg2: res_reg = rs_x2; break;
-    case kArg3: res_reg = rs_x3; break;
-    case kArg4: res_reg = rs_x4; break;
-    case kArg5: res_reg = rs_x5; break;
-    case kArg6: res_reg = rs_x6; break;
-    case kArg7: res_reg = rs_x7; break;
+    case kSp: res_reg =  rs_wsp; break;
+    case kArg0: res_reg = rs_w0; break;
+    case kArg1: res_reg = rs_w1; break;
+    case kArg2: res_reg = rs_w2; break;
+    case kArg3: res_reg = rs_w3; break;
+    case kArg4: res_reg = rs_w4; break;
+    case kArg5: res_reg = rs_w5; break;
+    case kArg6: res_reg = rs_w6; break;
+    case kArg7: res_reg = rs_w7; break;
     case kFArg0: res_reg = rs_f0; break;
     case kFArg1: res_reg = rs_f1; break;
     case kFArg2: res_reg = rs_f2; break;
@@ -129,10 +129,10 @@
     case kFArg5: res_reg = rs_f5; break;
     case kFArg6: res_reg = rs_f6; break;
     case kFArg7: res_reg = rs_f7; break;
-    case kRet0: res_reg = rs_x0; break;
-    case kRet1: res_reg = rs_x1; break;
-    case kInvokeTgt: res_reg = rs_xLR; break;
-    case kHiddenArg: res_reg = rs_x12; break;
+    case kRet0: res_reg = rs_w0; break;
+    case kRet1: res_reg = rs_w1; break;
+    case kInvokeTgt: res_reg = rs_wLR; break;
+    case kHiddenArg: res_reg = rs_w12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
     default: res_reg = RegStorage::InvalidReg();
@@ -158,7 +158,9 @@
 }
 
 ResourceMask Arm64Mir2Lir::GetPCUseDefEncoding() const {
-  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64";
+  // Note: On arm64, we are not able to set pc except branch instructions, which is regarded as a
+  //       kind of barrier. All other instructions only use pc, which has no dependency between any
+  //       of them. So it is fine to just return kEncodeNone here.
   return kEncodeNone;
 }
 
@@ -168,6 +170,7 @@
   DCHECK_EQ(cu_->instruction_set, kArm64);
   DCHECK(!lir->flags.use_def_invalid);
 
+  // Note: REG_USE_PC is ignored, the reason is the same with what we do in GetPCUseDefEncoding().
   // These flags are somewhat uncommon - bypass if we can.
   if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
     if (flags & REG_DEF_SP) {
@@ -566,10 +569,6 @@
   return (lir->opcode == kA64B1t);
 }
 
-bool Arm64Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
-  return true;
-}
-
 RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
   if (UNLIKELY(is_volatile)) {
     // On arm64, fp register load/store is atomic only for single bytes.
@@ -926,13 +925,13 @@
    */
   RegLocation rl_src = rl_method;
   rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0);
+  rl_src.reg = TargetReg(kArg0, kRef);
   rl_src.home = false;
   MarkLive(rl_src);
   StoreValue(rl_method, rl_src);
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile);
+    StoreRefDisp(TargetPtrReg(kSp), 0, rl_src.reg, kNotVolatile);
   }
 
   if (cu_->num_ins == 0) {
@@ -958,9 +957,9 @@
       } else {
         // Needs flush.
         if (t_loc->ref) {
-          StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
+          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
         } else {
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
+          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
               kNotVolatile);
         }
       }
@@ -968,9 +967,9 @@
       // If arriving in frame & promoted.
       if (t_loc->location == kLocPhysReg) {
         if (t_loc->ref) {
-          LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
+          LoadRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
         } else {
-          LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
+          LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
                        t_loc->wide ? k64 : k32, kNotVolatile);
         }
       }
@@ -1067,7 +1066,7 @@
         loc = UpdateLocWide(loc);
         if (loc.location == kLocPhysReg) {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
+          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
         }
         next_arg += 2;
       } else {
@@ -1075,9 +1074,10 @@
         if (loc.location == kLocPhysReg) {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
           if (loc.ref) {
-            StoreRefDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
           } else {
-            StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
+                          kNotVolatile);
           }
         }
         next_arg++;
@@ -1108,11 +1108,11 @@
 
       // Instead of allocating a new temp, simply reuse one of the registers being used
       // for argument passing.
-      RegStorage temp = TargetReg(kArg3, false);
+      RegStorage temp = TargetReg(kArg3, kNotWide);
 
       // Now load the argument VR and store to the outs.
-      Load32Disp(TargetReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
 
       current_src_offset += bytes_to_move;
       current_dest_offset += bytes_to_move;
@@ -1123,8 +1123,7 @@
 
   // Now handle rest not registers if they are
   if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regSingle = TargetReg(kArg2);
-    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    RegStorage regWide = TargetReg(kArg3, kWide);
     for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
       RegLocation rl_arg = info->args[i];
       rl_arg = UpdateRawLoc(rl_arg);
@@ -1136,25 +1135,27 @@
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
           if (rl_arg.wide) {
             if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
             } else {
               LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile);
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
             }
           } else {
             if (rl_arg.location == kLocPhysReg) {
               if (rl_arg.ref) {
-                StoreRefDisp(TargetReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
               } else {
-                StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
               }
             } else {
               if (rl_arg.ref) {
+                RegStorage regSingle = TargetReg(kArg2, kRef);
                 LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetReg(kSp), out_offset, regSingle, kNotVolatile);
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
               } else {
-                LoadValueDirectFixed(rl_arg, As32BitReg(regSingle));
-                StoreBaseDisp(TargetReg(kSp), out_offset, As32BitReg(regSingle), k32, kNotVolatile);
+                RegStorage regSingle = TargetReg(kArg2, kNotWide);
+                LoadValueDirectFixed(rl_arg, regSingle);
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
               }
             }
           }
@@ -1190,14 +1191,14 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1), 0, tmp);
+      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 22a4ec4..fdebb92 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -488,7 +488,9 @@
       break;
     case kOpRevsh:
       // Binary, but rm is encoded twice.
-      return NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
+      NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
+      // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
       break;
     case kOp2Byte:
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
@@ -1145,10 +1147,8 @@
   LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // Without context sensitive analysis, we must issue the most conservative barriers.
-    // In this case, either a load or store may follow so we issue both barriers.
-    GenMemBarrier(kLoadLoad);
-    GenMemBarrier(kLoadStore);
+    // TODO: This should generate an acquire load instead of the barrier.
+    GenMemBarrier(kLoadAny);
   }
 
   return load;
@@ -1232,9 +1232,10 @@
 
 LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                  OpSize size, VolatileKind is_volatile) {
+  // TODO: This should generate a release store and no barriers.
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // There might have been a store before this volatile one so insert StoreStore barrier.
-    GenMemBarrier(kStoreStore);
+    // Ensure that prior accesses become visible to other threads first.
+    GenMemBarrier(kAnyStore);
   }
 
   // StoreBaseDisp() will emit correct insn for atomic store on arm64
@@ -1243,8 +1244,9 @@
   LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // A load might follow the volatile store so insert a StoreLoad barrier.
-    GenMemBarrier(kStoreLoad);
+    // Preserve order with respect to any subsequent volatile loads.
+    // We need StoreLoad, but that generally requires the most expensive barrier.
+    GenMemBarrier(kAnyAny);
   }
 
   return store;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5870d22..463f277 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1046,9 +1046,19 @@
     }
     // Push a marker to take place of lr.
     vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-    // fp regs already sorted.
-    for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
-      vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
+    if (cu_->instruction_set == kThumb2) {
+      // fp regs already sorted.
+      for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
+        vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
+      }
+    } else {
+      // For other platforms regs may have been inserted out of order - sort first.
+      std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end());
+      for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) {
+        // Copy, stripping out the phys register sort key.
+        vmap_encoder.PushBackUnsigned(
+            ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
+      }
     }
   } else {
     DCHECK_EQ(POPCOUNT(core_spill_mask_), 0);
@@ -1162,9 +1172,12 @@
 }
 
 LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                int offset, int check_value, LIR* target) {
+                                int offset, int check_value, LIR* target, LIR** compare) {
   // Handle this for architectures that can't compare to memory.
-  Load32Disp(base_reg, offset, temp_reg);
+  LIR* inst = Load32Disp(base_reg, offset, temp_reg);
+  if (compare != nullptr) {
+    *compare = inst;
+  }
   LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
   return branch;
 }
@@ -1202,7 +1215,7 @@
     data_target->operands[2] = type;
   }
   // Loads an ArtMethod pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
   AppendLIR(load_pc_rel);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
@@ -1214,7 +1227,7 @@
     data_target = AddWordData(&class_literal_list_, type_idx);
   }
   // Loads a Class pointer, which is a reference as it lives in the heap.
-  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
   AppendLIR(load_pc_rel);
 }
 
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 6191e4b..0e46c96 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -48,6 +48,7 @@
     true,   // kIntrinsicMinMaxFloat
     true,   // kIntrinsicMinMaxDouble
     true,   // kIntrinsicSqrt
+    false,  // kIntrinsicGet
     false,  // kIntrinsicCharAt
     false,  // kIntrinsicCompareTo
     false,  // kIntrinsicIsEmptyOrLength
@@ -74,6 +75,7 @@
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], MinMaxFloat_must_be_static);
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], MinMaxDouble_must_be_static);
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicSqrt], Sqrt_must_be_static);
+COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicGet], Get_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCharAt], CharAt_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCompareTo], CompareTo_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], IsEmptyOrLength_must_not_be_static);
@@ -96,7 +98,7 @@
 
 uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) {
   DCHECK_LT(arg, invoke->dalvikInsn.vA);
-  DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode));
+  DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
   if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) {
     return invoke->dalvikInsn.vC + arg;  // Non-range invoke.
   } else {
@@ -107,7 +109,7 @@
 
 bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) {
   DCHECK_LT(arg + 1, invoke->dalvikInsn.vA);
-  DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode));
+  DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode));
   return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc ||
       invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u;
 }
@@ -126,6 +128,7 @@
     "D",                       // kClassCacheDouble
     "V",                       // kClassCacheVoid
     "Ljava/lang/Object;",      // kClassCacheJavaLangObject
+    "Ljava/lang/ref/Reference;",  // kClassCacheJavaLangRefReference
     "Ljava/lang/String;",      // kClassCacheJavaLangString
     "Ljava/lang/Double;",      // kClassCacheJavaLangDouble
     "Ljava/lang/Float;",       // kClassCacheJavaLangFloat
@@ -152,6 +155,7 @@
     "max",                   // kNameCacheMax
     "min",                   // kNameCacheMin
     "sqrt",                  // kNameCacheSqrt
+    "get",                   // kNameCacheGet
     "charAt",                // kNameCacheCharAt
     "compareTo",             // kNameCacheCompareTo
     "isEmpty",               // kNameCacheIsEmpty
@@ -220,6 +224,8 @@
     { kClassCacheBoolean, 0, { } },
     // kProtoCache_I
     { kClassCacheInt, 0, { } },
+    // kProtoCache_Object
+    { kClassCacheJavaLangObject, 0, { } },
     // kProtoCache_Thread
     { kClassCacheJavaLangThread, 0, { } },
     // kProtoCacheJ_B
@@ -308,6 +314,8 @@
     INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
     INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
 
+    INTRINSIC(JavaLangRefReference, Get, _Object, kIntrinsicGet, 0),
+
     INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
     INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
     INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
@@ -428,6 +436,8 @@
       return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_double */);
     case kIntrinsicSqrt:
       return backend->GenInlinedSqrt(info);
+    case kIntrinsicGet:
+      return backend->GenInlinedGet(info);
     case kIntrinsicCharAt:
       return backend->GenInlinedCharAt(info);
     case kIntrinsicCompareTo:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 5b3b104..cb8c165 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -107,6 +107,7 @@
       kClassCacheDouble,
       kClassCacheVoid,
       kClassCacheJavaLangObject,
+      kClassCacheJavaLangRefReference,
       kClassCacheJavaLangString,
       kClassCacheJavaLangDouble,
       kClassCacheJavaLangFloat,
@@ -140,6 +141,7 @@
       kNameCacheMax,
       kNameCacheMin,
       kNameCacheSqrt,
+      kNameCacheGet,
       kNameCacheCharAt,
       kNameCacheCompareTo,
       kNameCacheIsEmpty,
@@ -199,6 +201,7 @@
       kProtoCacheString_I,
       kProtoCache_Z,
       kProtoCache_I,
+      kProtoCache_Object,
       kProtoCache_Thread,
       kProtoCacheJ_B,
       kProtoCacheJ_I,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 6dc019a..502859a 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -127,8 +127,8 @@
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
 
-      RegStorage arg1_32 = m2l_->TargetReg(kArg1, false);
-      RegStorage arg0_32 = m2l_->TargetReg(kArg0, false);
+      RegStorage arg1_32 = m2l_->TargetReg(kArg1, kNotWide);
+      RegStorage arg0_32 = m2l_->TargetReg(kArg0, kNotWide);
 
       m2l_->OpRegCopy(arg1_32, length_);
       m2l_->LoadConstant(arg0_32, index_);
@@ -176,7 +176,7 @@
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
     return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
@@ -191,16 +191,17 @@
 }
 
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
-  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
+    // Insert after last instruction.
     MarkSafepointPC(last_lir_insn_);
   }
 }
 
 void Mir2Lir::MarkPossibleNullPointerExceptionAfter(int opt_flags, LIR* after) {
-  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -209,13 +210,13 @@
 }
 
 void Mir2Lir::MarkPossibleStackOverflowException() {
-  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
     MarkSafepointPC(last_lir_insn_);
   }
 }
 
 void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -368,7 +369,8 @@
       if (!use_direct_type_ptr) {
         mir_to_lir->LoadClassType(type_idx, kArg0);
         func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayResolved);
-        mir_to_lir->CallRuntimeHelperRegMethodRegLocation(func_offset, mir_to_lir->TargetReg(kArg0, false),
+        mir_to_lir->CallRuntimeHelperRegMethodRegLocation(func_offset,
+                                                          mir_to_lir->TargetReg(kArg0, kNotWide),
                                                           rl_src, true);
       } else {
         // Use the direct pointer.
@@ -431,8 +433,8 @@
   } else {
     GenFilledNewArrayCall<4>(this, cu_, elems, type_idx);
   }
-  FreeTemp(TargetReg(kArg2, false));
-  FreeTemp(TargetReg(kArg1, false));
+  FreeTemp(TargetReg(kArg2, kNotWide));
+  FreeTemp(TargetReg(kArg1, kNotWide));
   /*
    * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the
    * return region.  Because AllocFromCode placed the new array
@@ -440,7 +442,7 @@
    * added, it may be necessary to additionally copy all return
    * values to a home location in thread-local storage
    */
-  RegStorage ref_reg = TargetRefReg(kRet0);
+  RegStorage ref_reg = TargetReg(kRet0, kRef);
   LockTemp(ref_reg);
 
   // TODO: use the correct component size, currently all supported types
@@ -477,7 +479,7 @@
     switch (cu_->instruction_set) {
       case kThumb2:
       case kArm64:
-        r_val = TargetReg(kLr, false);
+        r_val = TargetReg(kLr, kNotWide);
         break;
       case kX86:
       case kX86_64:
@@ -553,7 +555,7 @@
                                  storage_index_, true);
     }
     // Copy helper's result into r_base, a no-op on all but MIPS.
-    m2l_->OpRegCopy(r_base_,  m2l_->TargetRefReg(kRet0));
+    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
 
     m2l_->OpUnconditionalBranch(cont_);
   }
@@ -580,8 +582,7 @@
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
   OpSize store_size = LoadStoreOpSize(is_long_or_double, is_object);
-  if (!SLOW_FIELD_PATH && field_info.FastPut() &&
-      (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
+  if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
@@ -601,10 +602,10 @@
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetRefReg(kArg1);
+      RegStorage r_method = TargetReg(kArg1, kRef);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      r_base = TargetRefReg(kArg0);
+      r_base = TargetReg(kArg0, kRef);
       LockTemp(r_base);
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                   kNotVolatile);
@@ -618,19 +619,23 @@
         // The slow path is invoked if the r_base is NULL or the class pointed
         // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-        RegStorage r_tmp = TargetReg(kArg2, false);
+        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, NULL);
+                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
         LIR* cont = NewLIR0(kPseudoTargetLabel);
 
         AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
-        // Ensure load of status and load of value don't re-order.
-        GenMemBarrier(kLoadLoad);
+        // Ensure load of status and store of value don't re-order.
+        // TODO: Presumably the actual value store is control-dependent on the status load,
+        // and will thus not be reordered in any case, since stores are never speculated.
+        // Does later code "know" that the class is now initialized?  If so, we still
+        // need the barrier to guard later static loads.
+        GenMemBarrier(kLoadAny);
       }
       FreeTemp(r_method);
     }
@@ -677,8 +682,7 @@
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
   OpSize load_size = LoadStoreOpSize(is_long_or_double, is_object);
-  if (!SLOW_FIELD_PATH && field_info.FastGet() &&
-      (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
+  if (!SLOW_FIELD_PATH && field_info.FastGet()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
@@ -694,10 +698,10 @@
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetRefReg(kArg1);
+      RegStorage r_method = TargetReg(kArg1, kRef);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      r_base = TargetRefReg(kArg0);
+      r_base = TargetReg(kArg0, kRef);
       LockTemp(r_base);
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                   kNotVolatile);
@@ -711,11 +715,11 @@
         // The slow path is invoked if the r_base is NULL or the class pointed
         // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-        RegStorage r_tmp = TargetReg(kArg2, false);
+        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, NULL);
+                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
         LIR* cont = NewLIR0(kPseudoTargetLabel);
 
         AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
@@ -723,7 +727,7 @@
 
         FreeTemp(r_tmp);
         // Ensure load of status and load of value don't re-order.
-        GenMemBarrier(kLoadLoad);
+        GenMemBarrier(kLoadAny);
       }
       FreeTemp(r_method);
     }
@@ -782,6 +786,8 @@
       is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pGet64Instance)
           : (is_object ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pGetObjInstance)
               : QUICK_ENTRYPOINT_OFFSET(pointer_size, pGet32Instance));
+  // Second argument of pGetXXInstance is always a reference.
+  DCHECK_EQ(static_cast<unsigned int>(rl_obj.wide), 0U);
   mir_to_lir->CallRuntimeHelperImmRegLocation(getter_offset, field_info->FieldIndex(), rl_obj,
                                               true);
 }
@@ -792,8 +798,7 @@
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
   OpSize load_size = LoadStoreOpSize(is_long_or_double, is_object);
-  if (!SLOW_FIELD_PATH && field_info.FastGet() &&
-      (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
+  if (!SLOW_FIELD_PATH && field_info.FastGet()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     rl_obj = LoadValue(rl_obj, kRefReg);
@@ -849,8 +854,7 @@
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
   OpSize store_size = LoadStoreOpSize(is_long_or_double, is_object);
-  if (!SLOW_FIELD_PATH && field_info.FastPut() &&
-      (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
+  if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     rl_obj = LoadValue(rl_obj, kRefReg);
@@ -957,7 +961,7 @@
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
                                                       rl_method_.reg, true);
           }
-          m2l_->OpRegCopy(rl_result_.reg,  m2l_->TargetRefReg(kRet0));
+          m2l_->OpRegCopy(rl_result_.reg,  m2l_->TargetReg(kRet0, kRef));
 
           m2l_->OpUnconditionalBranch(cont_);
         }
@@ -997,15 +1001,15 @@
       DCHECK(!IsTemp(rl_method.reg));
       r_method = rl_method.reg;
     } else {
-      r_method = TargetRefReg(kArg2);
+      r_method = TargetReg(kArg2, kRef);
       LoadCurrMethodDirect(r_method);
     }
     LoadRefDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
-                TargetRefReg(kArg0), kNotVolatile);
+                TargetReg(kArg0, kRef), kNotVolatile);
 
     // Might call out to helper, which will return resolved string in kRet0
-    LoadRefDisp(TargetRefReg(kArg0), offset_of_string, TargetRefReg(kRet0), kNotVolatile);
-    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetRefReg(kRet0), 0, NULL);
+    LoadRefDisp(TargetReg(kArg0, kRef), offset_of_string, TargetReg(kRet0, kRef), kNotVolatile);
+    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0, kRef), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
     {
@@ -1074,10 +1078,12 @@
         mir_to_lir->LoadClassType(type_idx, kArg0);
         if (!is_type_initialized) {
           func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectResolved);
-          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetRefReg(kArg0), true);
+          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetReg(kArg0, kRef),
+                                                 true);
         } else {
           func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectInitialized);
-          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetRefReg(kArg0), true);
+          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetReg(kArg0, kRef),
+                                                 true);
         }
       } else {
         // Use the direct pointer.
@@ -1196,9 +1202,9 @@
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  RegStorage method_reg = TargetRefReg(kArg1);
+  RegStorage method_reg = TargetReg(kArg1, kRef);
   LoadCurrMethodDirect(method_reg);   // kArg1 <= current Method*
-  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*
+  RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
@@ -1209,15 +1215,15 @@
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     }
-    OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path
-    LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
+    OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
   } else if (use_declaring_class) {
-    LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
     LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
     LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
@@ -1232,8 +1238,8 @@
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
       }
-      OpRegCopy(TargetRefReg(kArg2), TargetRefReg(kRet0));  // Align usage with fast path
-      LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  /* reload Ref */
+      OpRegCopy(TargetReg(kArg2, kRef), TargetReg(kRet0, kRef));  // Align usage with fast path
+      LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  /* reload Ref */
       // Rejoin code paths
       LIR* hop_target = NewLIR0(kPseudoTargetLabel);
       hop_branch->target = hop_target;
@@ -1245,25 +1251,25 @@
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
   }
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetRefReg(kArg0), 0, NULL);
+  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0, kRef), 0, NULL);
 
   /* load object->klass_ */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetRefReg(kArg1),
-              kNotVolatile);
+  LoadRefDisp(TargetReg(kArg0, kRef), mirror::Object::ClassOffset().Int32Value(),
+              TargetReg(kArg1, kRef), kNotVolatile);
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */
   LIR* branchover = NULL;
   if (type_known_final) {
     // rl_result == ref == null == 0.
     if (cu_->instruction_set == kThumb2) {
-      OpRegReg(kOpCmp, TargetRefReg(kArg1), TargetRefReg(kArg2));  // Same?
+      OpRegReg(kOpCmp, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef));  // Same?
       LIR* it = OpIT(kCondEq, "E");   // if-convert the test
       LoadConstant(rl_result.reg, 1);     // .eq case - load true
       LoadConstant(rl_result.reg, 0);     // .ne case - load false
       OpEndIT(it);
     } else {
       LoadConstant(rl_result.reg, 0);     // ne case - load false
-      branchover = OpCmpBranch(kCondNe, TargetRefReg(kArg1), TargetRefReg(kArg2), NULL);
+      branchover = OpCmpBranch(kCondNe, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef), NULL);
       LoadConstant(rl_result.reg, 1);     // eq case - load true
     }
   } else {
@@ -1274,11 +1280,11 @@
       LIR* it = nullptr;
       if (!type_known_abstract) {
       /* Uses conditional nullification */
-        OpRegReg(kOpCmp, TargetRefReg(kArg1), TargetRefReg(kArg2));  // Same?
+        OpRegReg(kOpCmp, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef));  // Same?
         it = OpIT(kCondEq, "EE");   // if-convert the test
-        LoadConstant(TargetReg(kArg0, false), 1);     // .eq case - load true
+        LoadConstant(TargetReg(kArg0, kNotWide), 1);     // .eq case - load true
       }
-      OpRegCopy(TargetRefReg(kArg0), TargetRefReg(kArg2));    // .ne case - arg0 <= class
+      OpRegCopy(TargetReg(kArg0, kRef), TargetReg(kArg2, kRef));    // .ne case - arg0 <= class
       OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
       if (it != nullptr) {
         OpEndIT(it);
@@ -1288,12 +1294,12 @@
       if (!type_known_abstract) {
         /* Uses branchovers */
         LoadConstant(rl_result.reg, 1);     // assume true
-        branchover = OpCmpBranch(kCondEq, TargetRefReg(kArg1), TargetRefReg(kArg2), NULL);
+        branchover = OpCmpBranch(kCondEq, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef), NULL);
       }
       RegStorage r_tgt = cu_->target64 ?
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
-      OpRegCopy(TargetRefReg(kArg0), TargetRefReg(kArg2));    // .ne case - arg0 <= class
+      OpRegCopy(TargetReg(kArg0, kRef), TargetReg(kArg2, kRef));    // .ne case - arg0 <= class
       OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
       FreeTemp(r_tgt);
     }
@@ -1347,9 +1353,9 @@
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  RegStorage method_reg = TargetRefReg(kArg1);
+  RegStorage method_reg = TargetReg(kArg1, kRef);
   LoadCurrMethodDirect(method_reg);  // kArg1 <= current Method*
-  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*
+  RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
@@ -1361,7 +1367,7 @@
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     }
-    OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path
+    OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path
   } else if (use_declaring_class) {
     LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
@@ -1392,12 +1398,12 @@
           // InitializeTypeFromCode(idx, method)
           if (m2l_->cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
-                                          m2l_->TargetRefReg(kArg1), true);
+                                          m2l_->TargetReg(kArg1, kRef), true);
           } else {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
-                                          m2l_->TargetRefReg(kArg1), true);
+                                          m2l_->TargetReg(kArg1, kRef), true);
           }
-          m2l_->OpRegCopy(class_reg_, m2l_->TargetRefReg(kRet0));  // Align usage with fast path
+          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0, kRef));  // Align usage with fast path
           m2l_->OpUnconditionalBranch(cont_);
         }
 
@@ -1410,7 +1416,7 @@
     }
   }
   // At this point, class_reg (kArg2) has class
-  LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
+  LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
 
   // Slow path for the case where the classes are not equal.  In this case we need
   // to call a helper function to do the check.
@@ -1424,15 +1430,17 @@
       GenerateTargetLabel();
 
       if (load_) {
-        m2l_->LoadRefDisp(m2l_->TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
-                          m2l_->TargetRefReg(kArg1), kNotVolatile);
+        m2l_->LoadRefDisp(m2l_->TargetReg(kArg0, kRef), mirror::Object::ClassOffset().Int32Value(),
+                          m2l_->TargetReg(kArg1, kRef), kNotVolatile);
       }
       if (m2l_->cu_->target64) {
-        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetRefReg(kArg2),
-                                      m2l_->TargetRefReg(kArg1), true);
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast),
+                                      m2l_->TargetReg(kArg2, kRef), m2l_->TargetReg(kArg1, kRef),
+                                      true);
       } else {
-        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast), m2l_->TargetRefReg(kArg2),
-                                              m2l_->TargetRefReg(kArg1), true);
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast),
+                                      m2l_->TargetReg(kArg2, kRef), m2l_->TargetReg(kArg1, kRef),
+                                      true);
       }
 
       m2l_->OpUnconditionalBranch(cont_);
@@ -1444,7 +1452,7 @@
 
   if (type_known_abstract) {
     // Easier case, run slow path if target is non-null (slow path will load from target)
-    LIR* branch = OpCmpImmBranch(kCondNe, TargetRefReg(kArg0), 0, nullptr);
+    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0, kRef), 0, nullptr);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
   } else {
@@ -1453,13 +1461,13 @@
     // slow path if the classes are not equal.
 
     /* Null is OK - continue */
-    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetRefReg(kArg0), 0, nullptr);
+    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0, kRef), 0, nullptr);
     /* load object->klass_ */
     DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-    LoadRefDisp(TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
-                TargetRefReg(kArg1), kNotVolatile);
+    LoadRefDisp(TargetReg(kArg0, kRef), mirror::Object::ClassOffset().Int32Value(),
+                TargetReg(kArg1, kRef), kNotVolatile);
 
-    LIR* branch2 = OpCmpBranch(kCondNe, TargetRefReg(kArg1), class_reg, nullptr);
+    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1, kRef), class_reg, nullptr);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
     // Add the slow path that will not perform load since this is already done.
@@ -1482,8 +1490,8 @@
      * lr is used explicitly elsewhere in the code generator and cannot
      * normally be used as a general temp register.
      */
-    MarkTemp(TargetReg(kLr));   // Add lr to the temp pool
-    FreeTemp(TargetReg(kLr));   // and make it available
+    MarkTemp(TargetReg(kLr, kNotWide));   // Add lr to the temp pool
+    FreeTemp(TargetReg(kLr, kNotWide));   // and make it available
   }
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
@@ -1510,8 +1518,8 @@
   FreeRegLocTemps(rl_result, rl_src2);
   StoreValueWide(rl_dest, rl_result);
   if (cu_->instruction_set == kThumb2) {
-    Clobber(TargetReg(kLr));
-    UnmarkTemp(TargetReg(kLr));  // Remove lr from the temp pool
+    Clobber(TargetReg(kLr, kNotWide));
+    UnmarkTemp(TargetReg(kLr, kNotWide));  // Remove lr from the temp pool
   }
 }
 
@@ -1675,13 +1683,13 @@
     // If we haven't already generated the code use the callout function.
     if (!done) {
       FlushAllRegs();   /* Send everything to home location */
-      LoadValueDirectFixed(rl_src2, TargetReg(kArg1, false));
+      LoadValueDirectFixed(rl_src2, TargetReg(kArg1, kNotWide));
       RegStorage r_tgt = cu_->target64 ?
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod)) :
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod));
-      LoadValueDirectFixed(rl_src1, TargetReg(kArg0, false));
+      LoadValueDirectFixed(rl_src1, TargetReg(kArg0, kNotWide));
       if (check_zero) {
-        GenDivZeroCheck(TargetReg(kArg1, false));
+        GenDivZeroCheck(TargetReg(kArg1, kNotWide));
       }
       // NOTE: callout here is not a safepoint.
       if (cu_->target64) {
@@ -1945,14 +1953,14 @@
 
       if (!done) {
         FlushAllRegs();   /* Everything to home location. */
-        LoadValueDirectFixed(rl_src, TargetReg(kArg0, false));
-        Clobber(TargetReg(kArg0, false));
+        LoadValueDirectFixed(rl_src, TargetReg(kArg0, kNotWide));
+        Clobber(TargetReg(kArg0, kNotWide));
         if (cu_->target64) {
-          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0, false), lit,
-                                  false);
+          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0, kNotWide),
+                                  lit, false);
         } else {
-          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), TargetReg(kArg0, false), lit,
-                                  false);
+          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), TargetReg(kArg0, kNotWide),
+                                  lit, false);
         }
         if (is_div)
           rl_result = GetReturn(kCoreReg);
@@ -1985,7 +1993,7 @@
   bool call_out = false;
   bool check_zero = false;
   ThreadOffset<pointer_size> func_offset(-1);
-  int ret_reg = mir_to_lir->TargetReg(kRet0, false).GetReg();
+  int ret_reg = mir_to_lir->TargetReg(kRet0, kNotWide).GetReg();
 
   switch (opcode) {
     case Instruction::NOT_LONG:
@@ -2033,7 +2041,7 @@
         return;
       } else {
         call_out = true;
-        ret_reg = mir_to_lir->TargetReg(kRet0, false).GetReg();
+        ret_reg = mir_to_lir->TargetReg(kRet0, kNotWide).GetReg();
         func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmul);
       }
       break;
@@ -2045,7 +2053,7 @@
       }
       call_out = true;
       check_zero = true;
-      ret_reg = mir_to_lir->TargetReg(kRet0, false).GetReg();
+      ret_reg = mir_to_lir->TargetReg(kRet0, kNotWide).GetReg();
       func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLdiv);
       break;
     case Instruction::REM_LONG:
@@ -2058,8 +2066,8 @@
       check_zero = true;
       func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmod);
       /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */
-      ret_reg = (cu->instruction_set == kThumb2) ? mir_to_lir->TargetReg(kArg2, false).GetReg() :
-          mir_to_lir->TargetReg(kRet0, false).GetReg();
+      ret_reg = (cu->instruction_set == kThumb2) ? mir_to_lir->TargetReg(kArg2, kNotWide).GetReg() :
+          mir_to_lir->TargetReg(kRet0, kNotWide).GetReg();
       break;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
@@ -2102,11 +2110,11 @@
   } else {
     mir_to_lir->FlushAllRegs();   /* Send everything to home location */
     if (check_zero) {
-      RegStorage r_tmp1 = mir_to_lir->TargetReg(kArg0, kArg1);
-      RegStorage r_tmp2 = mir_to_lir->TargetReg(kArg2, kArg3);
+      RegStorage r_tmp1 = mir_to_lir->TargetReg(kArg0, kWide);
+      RegStorage r_tmp2 = mir_to_lir->TargetReg(kArg2, kWide);
       mir_to_lir->LoadValueDirectWideFixed(rl_src2, r_tmp2);
       RegStorage r_tgt = mir_to_lir->CallHelperSetup(func_offset);
-      mir_to_lir->GenDivZeroCheckWide(mir_to_lir->TargetReg(kArg2, kArg3));
+      mir_to_lir->GenDivZeroCheckWide(r_tmp2);
       mir_to_lir->LoadValueDirectWideFixed(rl_src1, r_tmp1);
       // NOTE: callout here is not a safepoint
       mir_to_lir->CallHelper(r_tgt, func_offset, false /* not safepoint */);
@@ -2114,7 +2122,7 @@
       mir_to_lir->CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
-    if (ret_reg == mir_to_lir->TargetReg(kRet0, false).GetReg())
+    if (ret_reg == mir_to_lir->TargetReg(kRet0, kNotWide).GetReg())
       rl_result = mir_to_lir->GetReturnWide(kCoreReg);
     else
       rl_result = mir_to_lir->GetReturnWideAlt();
@@ -2189,7 +2197,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTest(int opt_flags) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       return;
     }
@@ -2209,7 +2217,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
-  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       OpUnconditionalBranch(target);
       return;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6c0dfe8..8ce6e1a 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -22,9 +22,13 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
 #include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
+#include "mirror/reference-inl.h"
 #include "mirror/string.h"
 #include "mir_to_lir-inl.h"
+#include "scoped_thread_state_change.h"
 #include "x86/codegen_x86.h"
 
 namespace art {
@@ -130,9 +134,10 @@
 INSTANTIATE(void Mir2Lir::CallRuntimeHelper, bool safepoint_pc)
 
 template <size_t pointer_size>
-void Mir2Lir::CallRuntimeHelperImm(ThreadOffset<pointer_size> helper_offset, int arg0, bool safepoint_pc) {
+void Mir2Lir::CallRuntimeHelperImm(ThreadOffset<pointer_size> helper_offset, int arg0,
+                                   bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -142,7 +147,7 @@
 void Mir2Lir::CallRuntimeHelperReg(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                    bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+  OpRegCopy(TargetReg(kArg0, arg0.GetWideKind()), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -155,13 +160,7 @@
   if (arg0.wide == 0) {
     LoadValueDirectFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, arg0));
   } else {
-    RegStorage r_tmp;
-    if (cu_->target64) {
-      r_tmp = TargetReg(kArg0, true);
-    } else {
-      r_tmp = TargetReg(arg0.fp ? kFArg0 : kArg0, arg0.fp ? kFArg1 : kArg1);
-    }
-    LoadValueDirectWideFixed(arg0, r_tmp);
+    LoadValueDirectWideFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, kWide));
   }
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
@@ -172,8 +171,8 @@
 void Mir2Lir::CallRuntimeHelperImmImm(ThreadOffset<pointer_size> helper_offset, int arg0, int arg1,
                                       bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadConstant(TargetReg(kArg0, false), arg0);
-  LoadConstant(TargetReg(kArg1, false), arg1);
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
+  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -183,23 +182,14 @@
 void Mir2Lir::CallRuntimeHelperImmRegLocation(ThreadOffset<pointer_size> helper_offset, int arg0,
                                               RegLocation arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
+  DCHECK(!arg1.fp);
   if (arg1.wide == 0) {
     LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
   } else {
-    RegStorage r_tmp;
-    if (cu_->target64) {
-      r_tmp = TargetReg(kArg1, true);
-    } else {
-      if (cu_->instruction_set == kMips) {
-        // skip kArg1 for stack alignment.
-        r_tmp = TargetReg(kArg2, kArg3);
-      } else {
-        r_tmp = TargetReg(kArg1, kArg2);
-      }
-    }
+    RegStorage r_tmp = TargetReg(cu_->instruction_set == kMips ? kArg2 : kArg1, kWide);
     LoadValueDirectWideFixed(arg1, r_tmp);
   }
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -212,7 +202,7 @@
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   DCHECK(!arg0.wide);
   LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
-  LoadConstant(TargetReg(kArg1, false), arg1);
+  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -223,8 +213,8 @@
 void Mir2Lir::CallRuntimeHelperImmReg(ThreadOffset<pointer_size> helper_offset, int arg0,
                                       RegStorage arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  OpRegCopy(TargetReg(kArg1, arg1.GetWideKind()), arg1);
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -234,8 +224,8 @@
 void Mir2Lir::CallRuntimeHelperRegImm(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                       int arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
-  LoadConstant(TargetReg(kArg1, false), arg1);
+  OpRegCopy(TargetReg(kArg0, arg0.GetWideKind()), arg0);
+  LoadConstant(TargetReg(kArg1, kNotWide), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -245,8 +235,8 @@
 void Mir2Lir::CallRuntimeHelperImmMethod(ThreadOffset<pointer_size> helper_offset, int arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadCurrMethodDirect(TargetRefReg(kArg1));
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -256,11 +246,12 @@
 void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.Is64Bit()), arg0));
-  if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) {
-    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.GetWideKind()), arg0));
+  RegStorage r_tmp = TargetReg(kArg0, arg0.GetWideKind());
+  if (r_tmp.NotExactlyEquals(arg0)) {
+    OpRegCopy(r_tmp, arg0);
   }
-  LoadCurrMethodDirect(TargetRefReg(kArg1));
+  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -271,11 +262,12 @@
                                                     RegStorage arg0, RegLocation arg2,
                                                     bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.Is64Bit()), arg0));
-  if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) {
-    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.GetWideKind()), arg0));
+  RegStorage r_tmp = TargetReg(kArg0, arg0.GetWideKind());
+  if (r_tmp.NotExactlyEquals(arg0)) {
+    OpRegCopy(r_tmp, arg0);
   }
-  LoadCurrMethodDirect(TargetRefReg(kArg1));
+  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
   LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
@@ -312,47 +304,26 @@
   } else {
     DCHECK(!cu_->target64);
     if (arg0.wide == 0) {
-      LoadValueDirectFixed(arg0, arg0.fp ? TargetReg(kFArg0, false) : TargetReg(kArg0, false));
+      LoadValueDirectFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, kNotWide));
       if (arg1.wide == 0) {
         if (cu_->instruction_set == kMips) {
-          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2, false) : TargetReg(kArg1, false));
+          LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg1, kNotWide));
         } else {
-          LoadValueDirectFixed(arg1, TargetReg(kArg1, false));
+          LoadValueDirectFixed(arg1, TargetReg(kArg1, kNotWide));
         }
       } else {
         if (cu_->instruction_set == kMips) {
-          RegStorage r_tmp;
-          if (arg1.fp) {
-            r_tmp = TargetReg(kFArg2, kFArg3);
-          } else {
-            // skip kArg1 for stack alignment.
-            r_tmp = TargetReg(kArg2, kArg3);
-          }
-          LoadValueDirectWideFixed(arg1, r_tmp);
+          LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kWide));
         } else {
-          RegStorage r_tmp;
-          r_tmp = TargetReg(kArg1, kArg2);
-          LoadValueDirectWideFixed(arg1, r_tmp);
+          LoadValueDirectWideFixed(arg1, TargetReg(kArg1, kWide));
         }
       }
     } else {
-      RegStorage r_tmp;
-      if (arg0.fp) {
-        r_tmp = TargetReg(kFArg0, kFArg1);
-      } else {
-        r_tmp = TargetReg(kArg0, kArg1);
-      }
-      LoadValueDirectWideFixed(arg0, r_tmp);
+      LoadValueDirectWideFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, kWide));
       if (arg1.wide == 0) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2, false) : TargetReg(kArg2, false));
+        LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kNotWide));
       } else {
-        RegStorage r_tmp;
-        if (arg1.fp) {
-          r_tmp = TargetReg(kFArg2, kFArg3);
-        } else {
-          r_tmp = TargetReg(kArg2, kArg3);
-        }
-        LoadValueDirectWideFixed(arg1, r_tmp);
+        LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kWide));
       }
     }
   }
@@ -363,19 +334,21 @@
             RegLocation arg1, bool safepoint_pc)
 
 void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
-  if (IsSameReg(arg1, TargetReg(kArg0, arg1.Is64Bit()))) {
-    if (IsSameReg(arg0, TargetReg(kArg1, arg0.Is64Bit()))) {
+  WideKind arg0_kind = arg0.GetWideKind();
+  WideKind arg1_kind = arg1.GetWideKind();
+  if (IsSameReg(arg1, TargetReg(kArg0, arg1_kind))) {
+    if (IsSameReg(arg0, TargetReg(kArg1, arg0_kind))) {
       // Swap kArg0 and kArg1 with kArg2 as temp.
-      OpRegCopy(TargetReg(kArg2, arg1.Is64Bit()), arg1);
-      OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
-      OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), TargetReg(kArg2, arg1.Is64Bit()));
+      OpRegCopy(TargetReg(kArg2, arg1_kind), arg1);
+      OpRegCopy(TargetReg(kArg0, arg0_kind), arg0);
+      OpRegCopy(TargetReg(kArg1, arg1_kind), TargetReg(kArg2, arg1_kind));
     } else {
-      OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
-      OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+      OpRegCopy(TargetReg(kArg1, arg1_kind), arg1);
+      OpRegCopy(TargetReg(kArg0, arg0_kind), arg0);
     }
   } else {
-    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
-    OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
+    OpRegCopy(TargetReg(kArg0, arg0_kind), arg0);
+    OpRegCopy(TargetReg(kArg1, arg1_kind), arg1);
   }
 }
 
@@ -395,7 +368,7 @@
                                          RegStorage arg1, int arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   CopyToArgumentRegs(arg0, arg1);
-  LoadConstant(TargetReg(kArg2, false), arg2);
+  LoadConstant(TargetReg(kArg2, kNotWide), arg2);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -407,8 +380,8 @@
                                                     int arg0, RegLocation arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
-  LoadCurrMethodDirect(TargetRefReg(kArg1));
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -419,9 +392,9 @@
 void Mir2Lir::CallRuntimeHelperImmMethodImm(ThreadOffset<pointer_size> helper_offset, int arg0,
                                             int arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadCurrMethodDirect(TargetRefReg(kArg1));
-  LoadConstant(TargetReg(kArg2, false), arg2);
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadCurrMethodDirect(TargetReg(kArg1, kRef));
+  LoadConstant(TargetReg(kArg2, kNotWide), arg2);
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -438,15 +411,9 @@
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   } else {
-    RegStorage r_tmp;
-    if (cu_->target64) {
-      r_tmp = TargetReg(kArg2, true);
-    } else {
-      r_tmp = TargetReg(kArg2, kArg3);
-    }
-    LoadValueDirectWideFixed(arg2, r_tmp);
+    LoadValueDirectWideFixed(arg2, TargetReg(kArg2, kWide));
   }
-  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadConstant(TargetReg(kArg0, kNotWide), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -454,10 +421,12 @@
             RegLocation arg2, bool safepoint_pc)
 
 template <size_t pointer_size>
-void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
-                                                                 RegLocation arg0, RegLocation arg1,
-                                                                 RegLocation arg2,
-                                                                 bool safepoint_pc) {
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(
+    ThreadOffset<pointer_size> helper_offset,
+    RegLocation arg0,
+    RegLocation arg1,
+    RegLocation arg2,
+    bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
   LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
@@ -484,7 +453,7 @@
    */
   RegLocation rl_src = rl_method;
   rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetRefReg(kArg0);
+  rl_src.reg = TargetReg(kArg0, kRef);
   rl_src.home = false;
   MarkLive(rl_src);
   StoreValue(rl_method, rl_src);
@@ -558,15 +527,44 @@
     } else {
       // If arriving in frame & promoted
       if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
+                   RegStorage::Solo32(v_map->core_reg));
       }
       if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
+        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
+                   RegStorage::Solo32(v_map->fp_reg));
       }
     }
   }
 }
 
+static void CommonCallCodeLoadThisIntoArg1(const CallInfo* info, Mir2Lir* cg) {
+  RegLocation rl_arg = info->args[0];
+  cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1, kRef));
+}
+
+static void CommonCallCodeLoadClassIntoArg0(const CallInfo* info, Mir2Lir* cg) {
+  cg->GenNullCheck(cg->TargetReg(kArg1, kRef), info->opt_flags);
+  // get this->klass_ [use kArg1, set kArg0]
+  cg->LoadRefDisp(cg->TargetReg(kArg1, kRef), mirror::Object::ClassOffset().Int32Value(),
+                  cg->TargetReg(kArg0, kRef),
+                  kNotVolatile);
+  cg->MarkPossibleNullPointerException(info->opt_flags);
+}
+
+static bool CommonCallCodeLoadCodePointerIntoInvokeTgt(const CallInfo* info,
+                                                       const RegStorage* alt_from,
+                                                       const CompilationUnit* cu, Mir2Lir* cg) {
+  if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+    // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
+    cg->LoadWordDisp(alt_from == nullptr ? cg->TargetReg(kArg0, kRef) : *alt_from,
+                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
+                     cg->TargetPtrReg(kInvokeTgt));
+    return true;
+  }
+  return false;
+}
+
 /*
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
@@ -588,7 +586,7 @@
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
       if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetRefReg(kArg0), direct_method);
+        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
       }
@@ -597,7 +595,7 @@
       return -1;
     }
   } else {
-    RegStorage arg0_ref = cg->TargetRefReg(kArg0);
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
@@ -626,12 +624,11 @@
                       kNotVolatile);
       break;
     case 3:  // Grab the code from the method*
-      if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        if (direct_code == 0) {
-          cg->LoadWordDisp(arg0_ref,
-                           mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                           cg->TargetPtrReg(kInvokeTgt));
+      if (direct_code == 0) {
+        if (CommonCallCodeLoadCodePointerIntoInvokeTgt(info, &arg0_ref, cu, cg)) {
+          break;                                    // kInvokeTgt := arg0_ref->entrypoint
         }
+      } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
         break;
       }
       // Intentional fallthrough for x86
@@ -659,36 +656,24 @@
    * fully resolved at compile time.
    */
   switch (state) {
-    case 0: {  // Get "this" [set kArg1]
-      RegLocation  rl_arg = info->args[0];
-      cg->LoadValueDirectFixed(rl_arg, cg->TargetRefReg(kArg1));
+    case 0:
+      CommonCallCodeLoadThisIntoArg1(info, cg);   // kArg1 := this
+      break;
+    case 1:
+      CommonCallCodeLoadClassIntoArg0(info, cg);  // kArg0 := kArg1->class
+                                                  // Includes a null-check.
+      break;
+    case 2: {
+      // Get this->klass_.embedded_vtable[method_idx] [usr kArg0, set kArg0]
+      int32_t offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
+          method_idx * sizeof(mirror::Class::VTableEntry);
+      // Load target method from embedded vtable to kArg0 [use kArg0, set kArg0]
+      cg->LoadRefDisp(cg->TargetReg(kArg0, kRef), offset, cg->TargetReg(kArg0, kRef), kNotVolatile);
       break;
     }
-    case 1:  // Is "this" null? [use kArg1]
-      cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
-      // get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
-                      kNotVolatile);
-      cg->MarkPossibleNullPointerException(info->opt_flags);
-      break;
-    case 2:  // Get this->klass_->vtable [usr kInvokeTgt, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
-                      kNotVolatile);
-      break;
-    case 3:  // Get target method [use kInvokeTgt, set kArg0]
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt),
-                      ObjArray::OffsetOfElement(method_idx).Int32Value(),
-                      cg->TargetRefReg(kArg0),
-                      kNotVolatile);
-      break;
-    case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
-      if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        cg->LoadWordDisp(cg->TargetRefReg(kArg0),
-                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                         cg->TargetPtrReg(kInvokeTgt));
-        break;
+    case 3:
+      if (CommonCallCodeLoadCodePointerIntoInvokeTgt(info, nullptr, cu, cg)) {
+        break;                                    // kInvokeTgt := kArg0->entrypoint
       }
       // Intentional fallthrough for X86
     default:
@@ -712,43 +697,28 @@
   switch (state) {
     case 0:  // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)]
       CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-      cg->LoadConstant(cg->TargetReg(kHiddenArg, false), target_method.dex_method_index);
+      cg->LoadConstant(cg->TargetReg(kHiddenArg, kNotWide), target_method.dex_method_index);
       if (cu->instruction_set == kX86) {
-        cg->OpRegCopy(cg->TargetReg(kHiddenFpArg, false), cg->TargetReg(kHiddenArg, false));
+        cg->OpRegCopy(cg->TargetReg(kHiddenFpArg, kNotWide), cg->TargetReg(kHiddenArg, kNotWide));
       }
       break;
-    case 1: {  // Get "this" [set kArg1]
-      RegLocation  rl_arg = info->args[0];
-      cg->LoadValueDirectFixed(rl_arg, cg->TargetRefReg(kArg1));
+    case 1:
+      CommonCallCodeLoadThisIntoArg1(info, cg);   // kArg1 := this
+      break;
+    case 2:
+      CommonCallCodeLoadClassIntoArg0(info, cg);  // kArg0 := kArg1->class
+                                                  // Includes a null-check.
+      break;
+    case 3: {  // Get target method [use kInvokeTgt, set kArg0]
+      int32_t offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
+          (method_idx % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+      // Load target method from embedded imtable to kArg0 [use kArg0, set kArg0]
+      cg->LoadRefDisp(cg->TargetReg(kArg0, kRef), offset, cg->TargetReg(kArg0, kRef), kNotVolatile);
       break;
     }
-    case 2:  // Is "this" null? [use kArg1]
-      cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
-      // Get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
-                      kNotVolatile);
-      cg->MarkPossibleNullPointerException(info->opt_flags);
-      break;
-    case 3:  // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt]
-      // NOTE: native pointer.
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
-                      kNotVolatile);
-      break;
-    case 4:  // Get target method [use kInvokeTgt, set kArg0]
-      // NOTE: native pointer.
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt),
-                       ObjArray::OffsetOfElement(method_idx % ClassLinker::kImtSize).Int32Value(),
-                       cg->TargetRefReg(kArg0),
-                       kNotVolatile);
-      break;
-    case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
-      if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        cg->LoadWordDisp(cg->TargetRefReg(kArg0),
-                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                         cg->TargetPtrReg(kInvokeTgt));
-        break;
+    case 4:
+      if (CommonCallCodeLoadCodePointerIntoInvokeTgt(info, nullptr, cu, cg)) {
+        break;                                    // kInvokeTgt := kArg0->entrypoint
       }
       // Intentional fallthrough for X86
     default:
@@ -758,9 +728,9 @@
 }
 
 template <size_t pointer_size>
-static int NextInvokeInsnSP(CompilationUnit* cu, CallInfo* info, ThreadOffset<pointer_size> trampoline,
-                            int state, const MethodReference& target_method,
-                            uint32_t method_idx) {
+static int NextInvokeInsnSP(CompilationUnit* cu, CallInfo* info,
+                            ThreadOffset<pointer_size> trampoline, int state,
+                            const MethodReference& target_method, uint32_t method_idx) {
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
   /*
    * This handles the case in which the base method is not fully
@@ -769,11 +739,12 @@
   if (state == 0) {
     if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
       // Load trampoline target
-      cg->LoadWordDisp(cg->TargetPtrReg(kSelf), trampoline.Int32Value(), cg->TargetPtrReg(kInvokeTgt));
+      cg->LoadWordDisp(cg->TargetPtrReg(kSelf), trampoline.Int32Value(),
+                       cg->TargetPtrReg(kInvokeTgt));
     }
     // Load kArg0 with method index
     CHECK_EQ(cu->dex_file, target_method.dex_file);
-    cg->LoadConstant(cg->TargetReg(kArg0, false), target_method.dex_method_index);
+    cg->LoadConstant(cg->TargetReg(kArg0, kNotWide), target_method.dex_method_index);
     return 1;
   }
   return -1;
@@ -824,10 +795,12 @@
                            uint32_t unused, uintptr_t unused2,
                            uintptr_t unused3, InvokeType unused4) {
   if (cu->target64) {
-    ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeVirtualTrampolineWithAccessCheck);
+    ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8,
+        pInvokeVirtualTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
-    ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeVirtualTrampolineWithAccessCheck);
+    ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4,
+        pInvokeVirtualTrampolineWithAccessCheck);
     return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
   }
 }
@@ -838,10 +811,12 @@
                                                 uint32_t unused, uintptr_t unused2,
                                                 uintptr_t unused3, InvokeType unused4) {
   if (cu->target64) {
-      ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeInterfaceTrampolineWithAccessCheck);
+      ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8,
+          pInvokeInterfaceTrampolineWithAccessCheck);
       return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
     } else {
-      ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeInterfaceTrampolineWithAccessCheck);
+      ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4,
+          pInvokeInterfaceTrampolineWithAccessCheck);
       return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
     }
 }
@@ -852,7 +827,8 @@
                          uint32_t vtable_idx, uintptr_t direct_code,
                          uintptr_t direct_method, InvokeType type, bool skip_this) {
   int last_arg_reg = 3 - 1;
-  int arg_regs[3] = {TargetReg(kArg1, false).GetReg(), TargetReg(kArg2, false).GetReg(), TargetReg(kArg3, false).GetReg()};
+  int arg_regs[3] = {TargetReg(kArg1, kNotWide).GetReg(), TargetReg(kArg2, kNotWide).GetReg(),
+                     TargetReg(kArg3, kNotWide).GetReg()};
 
   int next_reg = 0;
   int next_arg = 0;
@@ -927,7 +903,7 @@
         }
       } else {
         // kArg2 & rArg3 can safely be used here
-        reg = TargetReg(kArg3, false);
+        reg = TargetReg(kArg3, kNotWide);
         {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
           Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
@@ -951,7 +927,7 @@
       if (rl_arg.location == kLocPhysReg) {
         arg_reg = rl_arg.reg;
       } else {
-        arg_reg = rl_arg.wide ? TargetReg(kArg2, kArg3) : TargetReg(kArg2, false);
+        arg_reg = TargetReg(kArg2, rl_arg.wide ? kWide : kNotWide);
         if (rl_arg.wide) {
           LoadValueDirectWideFixed(rl_arg, arg_reg);
         } else {
@@ -981,21 +957,35 @@
                            type, skip_this);
 
   if (pcrLabel) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
+      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
+          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
+        return call_state;
+      }
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
+      GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     }
   }
   return call_state;
 }
 
+// Default implementation of implicit null pointer check.
+// Overridden by arch specific as necessary.
+void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  RegStorage tmp = AllocTemp();
+  Load32Disp(reg, 0, tmp);
+  MarkPossibleNullPointerException(opt_flags);
+  FreeTemp(tmp);
+}
+
+
 /*
  * May have 0+ arguments (also used for jumbo).  Note that
  * source virtual registers may be in physical registers, so may
@@ -1062,23 +1052,23 @@
     // Use vldm/vstm pair using kArg3 as a temp
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetRefReg(kArg3), TargetPtrReg(kSp), start_offset);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), start_offset);
     LIR* ld = nullptr;
     {
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      ld = OpVldm(TargetRefReg(kArg3), regs_left_to_pass_via_stack);
+      ld = OpVldm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
     }
     // TUNING: loosen barrier
     ld->u.m.def_mask = &kEncodeAll;
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetRefReg(kArg3), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4));
+    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4));
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
     LIR* st = nullptr;
     {
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      st = OpVstm(TargetRefReg(kArg3), regs_left_to_pass_via_stack);
+      st = OpVstm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
     }
     st->u.m.def_mask = &kEncodeAll;
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
@@ -1152,7 +1142,8 @@
           if (ld2 != nullptr) {
             // For 64-bit load we can actually set up the aliasing information.
             AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
+                                    true);
           } else {
             // Set barrier for 128-bit load.
             ld1->u.m.def_mask = &kEncodeAll;
@@ -1162,7 +1153,8 @@
           if (st2 != nullptr) {
             // For 64-bit store we can actually set up the aliasing information.
             AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
+                                    true);
           } else {
             // Set barrier for 128-bit store.
             st1->u.m.def_mask = &kEncodeAll;
@@ -1177,7 +1169,7 @@
 
         // Instead of allocating a new temp, simply reuse one of the registers being used
         // for argument passing.
-        RegStorage temp = TargetReg(kArg3, false);
+        RegStorage temp = TargetReg(kArg3, kNotWide);
 
         // Now load the argument VR and store to the outs.
         Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
@@ -1190,14 +1182,14 @@
     }
   } else {
     // Generate memcpy
-    OpRegRegImm(kOpAdd, TargetRefReg(kArg0), TargetPtrReg(kSp), outs_offset);
-    OpRegRegImm(kOpAdd, TargetRefReg(kArg1), TargetPtrReg(kSp), start_offset);
+    OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset);
+    OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset);
     if (cu_->target64) {
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetRefReg(kArg0),
-                                 TargetRefReg(kArg1), (info->num_arg_words - 3) * 4, false);
+      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetReg(kArg0, kRef),
+                                 TargetReg(kArg1, kRef), (info->num_arg_words - 3) * 4, false);
     } else {
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(4, pMemcpy), TargetRefReg(kArg0),
-                                 TargetRefReg(kArg1), (info->num_arg_words - 3) * 4, false);
+      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(4, pMemcpy), TargetReg(kArg0, kRef),
+                                 TargetReg(kArg1, kRef), (info->num_arg_words - 3) * 4, false);
     }
   }
 
@@ -1208,16 +1200,17 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
+      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
+          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
+        return call_state;
+      }
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
+      GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     }
   }
   return call_state;
@@ -1243,6 +1236,88 @@
   return res;
 }
 
+bool Mir2Lir::GenInlinedGet(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+
+  // the refrence class is stored in the image dex file which might not be the same as the cu's
+  // dex file. Query the reference class for the image dex file then reset to starting dex file
+  // in after loading class type.
+  uint16_t type_idx = 0;
+  const DexFile* ref_dex_file = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    type_idx = mirror::Reference::GetJavaLangRefReference()->GetDexTypeIndex();
+    ref_dex_file = mirror::Reference::GetJavaLangRefReference()->GetDexCache()->GetDexFile();
+  }
+  CHECK(LIKELY(ref_dex_file != nullptr));
+
+  // address is either static within the image file, or needs to be patched up after compilation.
+  bool unused_type_initialized;
+  bool use_direct_type_ptr;
+  uintptr_t direct_type_ptr;
+  bool is_finalizable;
+  const DexFile* old_dex = cu_->dex_file;
+  cu_->dex_file = ref_dex_file;
+  RegStorage reg_class = TargetPtrReg(kArg1);
+  if (!cu_->compiler_driver->CanEmbedTypeInCode(*ref_dex_file, type_idx, &unused_type_initialized,
+                                                &use_direct_type_ptr, &direct_type_ptr,
+                                                &is_finalizable) || is_finalizable) {
+    cu_->dex_file = old_dex;
+    // address is not known and post-compile patch is not possible, cannot insert intrinsic.
+    return false;
+  }
+  if (use_direct_type_ptr) {
+    LoadConstant(reg_class, direct_type_ptr);
+  } else {
+    LoadClassType(type_idx, kArg1);
+  }
+  cu_->dex_file = old_dex;
+
+  // get the offset for flags in reference class.
+  uint32_t slow_path_flag_offset = 0;
+  uint32_t disable_flag_offset = 0;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference();
+    slow_path_flag_offset = reference_class->GetSlowPathFlagOffset().Uint32Value();
+    disable_flag_offset = reference_class->GetDisableIntrinsicFlagOffset().Uint32Value();
+  }
+  CHECK(slow_path_flag_offset && disable_flag_offset &&
+        (slow_path_flag_offset != disable_flag_offset));
+
+  // intrinsic logic start.
+  RegLocation rl_obj = info->args[0];
+  rl_obj = LoadValue(rl_obj);
+
+  RegStorage reg_slow_path = AllocTemp();
+  RegStorage reg_disabled = AllocTemp();
+  Load32Disp(reg_class, slow_path_flag_offset, reg_slow_path);
+  Load32Disp(reg_class, disable_flag_offset, reg_disabled);
+  OpRegRegReg(kOpOr, reg_slow_path, reg_slow_path, reg_disabled);
+  FreeTemp(reg_disabled);
+
+  // if slow path, jump to JNI path target
+  LIR* slow_path_branch = OpCmpImmBranch(kCondNe, reg_slow_path, 0, nullptr);
+  FreeTemp(reg_slow_path);
+
+  // slow path not enabled, simply load the referent of the reference object
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  GenNullCheck(rl_obj.reg, info->opt_flags);
+  LoadRefDisp(rl_obj.reg, mirror::Reference::ReferentOffset().Int32Value(), rl_result.reg,
+      kNotVolatile);
+  MarkPossibleNullPointerException(info->opt_flags);
+  StoreValue(rl_dest, rl_result);
+
+  LIR* intrinsic_finish = NewLIR0(kPseudoTargetLabel);
+  AddIntrinsicSlowPath(info, slow_path_branch, intrinsic_finish);
+
+  return true;
+}
+
 bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
@@ -1293,11 +1368,14 @@
       // On x86, we can compare to memory directly
       // Set up a launch pad to allow retry in case of bounds violation */
       if (rl_idx.is_const) {
+        LIR* comparison;
         range_check_branch = OpCmpMemImmBranch(
             kCondUlt, RegStorage::InvalidReg(), rl_obj.reg, count_offset,
-            mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr);
-      } else {
+            mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr, &comparison);
+        MarkPossibleNullPointerExceptionAfter(0, comparison);
+     } else {
         OpRegMem(kOpCmp, rl_idx.reg, rl_obj.reg, count_offset);
+        MarkPossibleNullPointerException(0);
         range_check_branch = OpCondBranch(kCondUge, nullptr);
       }
     }
@@ -1367,8 +1445,8 @@
 }
 
 bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) {
-  if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
-    // TODO - add Mips implementation; Enable Arm64.
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation.
     return false;
   }
   RegLocation rl_src_i = info->args[0];
@@ -1554,9 +1632,9 @@
 
   ClobberCallerSave();
   LockCallTemps();  // Using fixed registers
-  RegStorage reg_ptr = TargetRefReg(kArg0);
-  RegStorage reg_char = TargetReg(kArg1, false);
-  RegStorage reg_start = TargetReg(kArg2, false);
+  RegStorage reg_ptr = TargetReg(kArg0, kRef);
+  RegStorage reg_char = TargetReg(kArg1, kNotWide);
+  RegStorage reg_start = TargetReg(kArg2, kNotWide);
 
   LoadValueDirectFixed(rl_obj, reg_ptr);
   LoadValueDirectFixed(rl_char, reg_char);
@@ -1598,8 +1676,8 @@
   }
   ClobberCallerSave();
   LockCallTemps();  // Using fixed registers
-  RegStorage reg_this = TargetRefReg(kArg0);
-  RegStorage reg_cmp = TargetRefReg(kArg1);
+  RegStorage reg_this = TargetReg(kArg0, kRef);
+  RegStorage reg_cmp = TargetReg(kArg1, kRef);
 
   RegLocation rl_this = info->args[0];
   RegLocation rl_cmp = info->args[1];
@@ -1711,10 +1789,7 @@
   }
 
   if (is_volatile) {
-    // Without context sensitive analysis, we must issue the most conservative barriers.
-    // In this case, either a load or store may follow so we issue both barriers.
-    GenMemBarrier(kLoadLoad);
-    GenMemBarrier(kLoadStore);
+    GenMemBarrier(kLoadAny);
   }
 
   if (is_long) {
@@ -1737,8 +1812,7 @@
   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
   RegLocation rl_src_value = info->args[4];  // value to store
   if (is_volatile || is_ordered) {
-    // There might have been a store before this volatile one so insert StoreStore barrier.
-    GenMemBarrier(kStoreStore);
+    GenMemBarrier(kAnyStore);
   }
   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
@@ -1767,8 +1841,9 @@
   FreeTemp(rl_offset.reg);
 
   if (is_volatile) {
-    // A load might follow the volatile store so insert a StoreLoad barrier.
-    GenMemBarrier(kStoreLoad);
+    // Prevent reordering with a subsequent volatile load.
+    // May also be needed to address store atomicity issues.
+    GenMemBarrier(kAnyAny);
   }
   if (is_object) {
     MarkGCCard(rl_value.reg, rl_object.reg);
@@ -1884,7 +1959,7 @@
         call_inst =
           reinterpret_cast<X86Mir2Lir*>(this)->CallWithLinkerFixup(target_method, info->type);
       } else {
-        call_inst = OpMem(kOpBlx, TargetRefReg(kArg0),
+        call_inst = OpMem(kOpBlx, TargetReg(kArg0, kRef),
                           mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
       }
     } else {
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index b97ff2a..2893157 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -121,20 +121,22 @@
     }
 
     ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);
-    ResourceMask stop_use_reg_mask;
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+
+    /*
+     * Add pc to the resource mask to prevent this instruction
+     * from sinking past branch instructions. Also take out the memory
+     * region bits since stop_mask is used to check data/control
+     * dependencies.
+     *
+     * Note: on x86(-64) and Arm64 we use the IsBranch bit, as the PC is not exposed.
+     */
+    ResourceMask pc_encoding = GetPCUseDefEncoding();
+    if (pc_encoding == kEncodeNone) {
       // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask.
-      stop_use_reg_mask = ResourceMask::Bit(kIsBranch).Union(*this_lir->u.m.use_mask).Without(
-          kEncodeMem);
-    } else {
-      /*
-       * Add pc to the resource mask to prevent this instruction
-       * from sinking past branch instructions. Also take out the memory
-       * region bits since stop_mask is used to check data/control
-       * dependencies.
-       */
-      stop_use_reg_mask = GetPCUseDefEncoding().Union(*this_lir->u.m.use_mask).Without(kEncodeMem);
+      pc_encoding = ResourceMask::Bit(kIsBranch);
     }
+    ResourceMask  stop_use_reg_mask = pc_encoding.Union(*this_lir->u.m.use_mask).
+        Without(kEncodeMem);
 
     for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
       /*
@@ -310,16 +312,17 @@
 
     ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask;
 
-    if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-      /*
-       * Branches for null/range checks are marked with the true resource
-       * bits, and loads to Dalvik registers, constant pools, and non-alias
-       * locations are safe to be hoisted. So only mark the heap references
-       * conservatively here.
-       */
-      if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) {
-        stop_use_all_mask.SetBits(GetPCUseDefEncoding());
-      }
+    /*
+     * Branches for null/range checks are marked with the true resource
+     * bits, and loads to Dalvik registers, constant pools, and non-alias
+     * locations are safe to be hoisted. So only mark the heap references
+     * conservatively here.
+     *
+     * Note: on x86(-64) and Arm64 this will add kEncodeNone.
+     * TODO: Sanity check. LoadStoreElimination uses kBranchBit to fake a PC.
+     */
+    if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) {
+      stop_use_all_mask.SetBits(GetPCUseDefEncoding());
     }
 
     /* Similar as above, but just check for pure register dependency */
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 025f97a..4a06086 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -47,6 +47,8 @@
                           OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                               RegStorage r_src, OpSize size) OVERRIDE;
+    LIR* GenAtomic64Load(RegStorage r_base, int displacement, RegStorage r_dest);
+    LIR* GenAtomic64Store(RegStorage r_base, int displacement, RegStorage r_src);
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
@@ -83,8 +85,6 @@
     size_t GetInsnSize(LIR* lir) OVERRIDE;
     bool IsUnconditionalBranch(LIR* lir);
 
-    // Check support for volatile load/store of a given size.
-    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
     // Get the register class for load/store of a field.
     RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
@@ -192,6 +192,13 @@
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
 
+    bool WideGPRsAreAliases() OVERRIDE {
+      return false;  // Wide GPRs are formed by pairing.
+    }
+    bool WideFPRsAreAliases() OVERRIDE {
+      return false;  // Wide FPRs are formed by pairing.
+    }
+
   private:
     void ConvertShortToLongBranch(LIR* lir);
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index a5b7824..4ba94c4 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -496,6 +496,39 @@
   return inst;
 }
 
+LIR* MipsMir2Lir::GenAtomic64Load(RegStorage r_base, int displacement, RegStorage r_dest) {
+  DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadStore().
+  DCHECK(r_dest.IsPair());
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage reg_ptr = TargetReg(kArg0);
+  OpRegRegImm(kOpAdd, reg_ptr, r_base, displacement);
+  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pA64Load));
+  LIR *ret = OpReg(kOpBlx, r_tgt);
+  RegStorage reg_ret = RegStorage::MakeRegPair(TargetReg(kRet0), TargetReg(kRet1));
+  OpRegCopyWide(r_dest, reg_ret);
+  return ret;
+}
+
+LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStorage r_src) {
+  DCHECK(!r_src.IsFloat());  // See RegClassForFieldLoadStore().
+  DCHECK(r_src.IsPair());
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage temp_ptr = AllocTemp();
+  OpRegRegImm(kOpAdd, temp_ptr, r_base, displacement);
+  RegStorage temp_value = AllocTempWide();
+  OpRegCopyWide(temp_value, r_src);
+  RegStorage reg_ptr = TargetReg(kArg0);
+  OpRegCopy(reg_ptr, temp_ptr);
+  RegStorage reg_value = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+  OpRegCopyWide(reg_value, temp_value);
+  FreeTemp(temp_ptr);
+  FreeTemp(temp_value);
+  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pA64Store));
+  return OpReg(kOpBlx, r_tgt);
+}
+
 void MipsMir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -530,17 +563,12 @@
   return (lir->opcode == kMipsB);
 }
 
-bool MipsMir2Lir::SupportsVolatileLoadStore(OpSize size) {
-  // No support for 64-bit atomic load/store on mips.
-  return size != k64 && size != kDouble;
-}
-
 RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
   if (UNLIKELY(is_volatile)) {
-    // On Mips, atomic 64-bit load/store requires an fp register.
+    // On Mips, atomic 64-bit load/store requires a core register.
     // Smaller aligned load/store is atomic for both core and fp registers.
     if (size == k64 || size == kDouble) {
-      return kFPReg;
+      return kCoreReg;
     }
   }
   // TODO: Verify that both core and fp registers are suitable for smaller sizes.
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 129a696..0e8188b 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -551,8 +551,9 @@
 
 LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                                OpSize size, VolatileKind is_volatile) {
-  if (is_volatile == kVolatile) {
-    DCHECK(size != k64 && size != kDouble);
+  if (UNLIKELY(is_volatile == kVolatile && (size == k64 || size == kDouble))) {
+    // Do atomic 64-bit load.
+    return GenAtomic64Load(r_base, displacement, r_dest);
   }
 
   // TODO: base this on target.
@@ -563,10 +564,7 @@
   load = LoadBaseDispBody(r_base, displacement, r_dest, size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // Without context sensitive analysis, we must issue the most conservative barriers.
-    // In this case, either a load or store may follow so we issue both barriers.
-    GenMemBarrier(kLoadLoad);
-    GenMemBarrier(kLoadStore);
+    GenMemBarrier(kLoadAny);
   }
 
   return load;
@@ -657,21 +655,26 @@
 LIR* MipsMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                 OpSize size, VolatileKind is_volatile) {
   if (is_volatile == kVolatile) {
-    DCHECK(size != k64 && size != kDouble);
-    // There might have been a store before this volatile one so insert StoreStore barrier.
-    GenMemBarrier(kStoreStore);
+    // Ensure that prior accesses become visible to other threads first.
+    GenMemBarrier(kAnyStore);
   }
 
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   LIR* store;
-  store = StoreBaseDispBody(r_base, displacement, r_src, size);
+  if (UNLIKELY(is_volatile == kVolatile && (size == k64 || size == kDouble))) {
+    // Do atomic 64-bit load.
+    store = GenAtomic64Store(r_base, displacement, r_src);
+  } else {
+    // TODO: base this on target.
+    if (size == kWord) {
+      size = k32;
+    }
+    store = StoreBaseDispBody(r_base, displacement, r_src, size);
+  }
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // A load might follow the volatile store so insert a StoreLoad barrier.
-    GenMemBarrier(kStoreLoad);
+    // Preserve order with respect to any subsequent volatile loads.
+    // We need StoreLoad, but that generally requires the most expensive barrier.
+    GenMemBarrier(kAnyAny);
   }
 
   return store;
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index caadc0a..ed7fcdd 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -18,7 +18,6 @@
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "mir_to_lir-inl.h"
-#include "object_utils.h"
 #include "thread-inl.h"
 
 namespace art {
@@ -227,9 +226,6 @@
   bool wide = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE));
   bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
   OpSize size = LoadStoreOpSize(wide, ref);
-  if (data.is_volatile && !SupportsVolatileLoadStore(size)) {
-    return false;
-  }
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
@@ -274,9 +270,6 @@
   bool wide = (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE));
   bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
   OpSize size = LoadStoreOpSize(wide, ref);
-  if (data.is_volatile && !SupportsVolatileLoadStore(size)) {
-    return false;
-  }
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
@@ -1185,7 +1178,7 @@
       work_half->meta.throw_insn = mir;
     }
 
-    if (MIRGraph::IsPseudoMirOp(opcode)) {
+    if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
       HandleExtendedMethodMIR(bb, mir);
       continue;
     }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4885501..5b56633 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -531,7 +531,7 @@
       LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
                   LIR* cont = nullptr) :
         m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
-          m2l->StartSlowPath(cont);
+          m2l->StartSlowPath(this);
       }
       virtual ~LIRSlowPath() {}
       virtual void Compile() = 0;
@@ -705,17 +705,17 @@
     int AssignLiteralOffset(CodeOffset offset);
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
-    LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
+    virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
     void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
 
-    virtual void StartSlowPath(LIR *label) {}
+    virtual void StartSlowPath(LIRSlowPath* slowpath) {}
     virtual void BeginInvoke(CallInfo* info) {}
     virtual void EndInvoke(CallInfo* info) {}
 
 
     // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation.  No code generated.
-    RegLocation NarrowRegLoc(RegLocation loc);
+    virtual RegLocation NarrowRegLoc(RegLocation loc);
 
     // Shared by all targets - implemented in local_optimizations.cc
     void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src);
@@ -763,7 +763,7 @@
     virtual bool IsTemp(RegStorage reg);
     bool IsPromoted(RegStorage reg);
     bool IsDirty(RegStorage reg);
-    void LockTemp(RegStorage reg);
+    virtual void LockTemp(RegStorage reg);
     void ResetDef(RegStorage reg);
     void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
@@ -838,6 +838,7 @@
     LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind);
     LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
     LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
+    virtual void GenImplicitNullCheck(RegStorage reg, int opt_flags);
     void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
     void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src,
@@ -982,6 +983,7 @@
      */
     RegLocation InlineTargetWide(CallInfo* info);
 
+    bool GenInlinedGet(CallInfo* info);
     bool GenInlinedCharAt(CallInfo* info);
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
     virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
@@ -1147,10 +1149,12 @@
      * @param base_reg The register holding the base address.
      * @param offset The offset from the base.
      * @param check_value The immediate to compare to.
+     * @param target branch target (or nullptr)
+     * @param compare output for getting LIR for comparison (or nullptr)
      * @returns The branch instruction that was generated.
      */
     virtual LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target);
+                                   int offset, int check_value, LIR* target, LIR** compare);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -1198,29 +1202,28 @@
     /**
      * @brief Portable way of getting special registers from the backend.
      * @param reg Enumeration describing the purpose of the register.
-     * @param is_wide Whether the view should be 64-bit (rather than 32-bit).
+     * @param wide_kind What kind of view of the special register is required.
      * @return Return the #RegStorage corresponding to the given purpose @p reg.
+     *
+     * @note For 32b system, wide (kWide) views only make sense for the argument registers and the
+     *       return. In that case, this function should return a pair where the first component of
+     *       the result will be the indicated special register.
      */
-    virtual RegStorage TargetReg(SpecialTargetRegister reg, bool is_wide) {
-      return TargetReg(reg);
-    }
-
-    /**
-     * @brief Portable way of getting special register pair from the backend.
-     * @param reg Enumeration describing the purpose of the first register.
-     * @param reg Enumeration describing the purpose of the second register.
-     * @return Return the #RegStorage corresponding to the given purpose @p reg.
-     */
-    virtual RegStorage TargetReg(SpecialTargetRegister reg1, SpecialTargetRegister reg2) {
-      return RegStorage::MakeRegPair(TargetReg(reg1, false), TargetReg(reg2, false));
-    }
-
-    /**
-     * @brief Portable way of getting a special register for storing a reference.
-     * @see TargetReg()
-     */
-    virtual RegStorage TargetRefReg(SpecialTargetRegister reg) {
-      return TargetReg(reg);
+    virtual RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) {
+      if (wide_kind == kWide) {
+        DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg7) || (kRet0 == reg));
+        COMPILE_ASSERT((kArg1 == kArg0 + 1) && (kArg2 == kArg1 + 1) && (kArg3 == kArg2 + 1) &&
+                       (kArg4 == kArg3 + 1) && (kArg5 == kArg4 + 1) && (kArg6 == kArg5 + 1) &&
+                       (kArg7 == kArg6 + 1), kargs_range_unexpected);
+        COMPILE_ASSERT((kFArg1 == kFArg0 + 1) && (kFArg2 == kFArg1 + 1) && (kFArg3 == kFArg2 + 1) &&
+                       (kFArg4 == kFArg3 + 1) && (kFArg5 == kFArg4 + 1) && (kFArg6 == kFArg5 + 1) &&
+                       (kFArg7 == kFArg6 + 1), kfargs_range_unexpected);
+        COMPILE_ASSERT(kRet1 == kRet0 + 1, kret_range_unexpected);
+        return RegStorage::MakeRegPair(TargetReg(reg),
+                                       TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
+      } else {
+        return TargetReg(reg);
+      }
     }
 
     /**
@@ -1234,9 +1237,9 @@
     // Get a reg storage corresponding to the wide & ref flags of the reg location.
     virtual RegStorage TargetReg(SpecialTargetRegister reg, RegLocation loc) {
       if (loc.ref) {
-        return TargetRefReg(reg);
+        return TargetReg(reg, kRef);
       } else {
-        return TargetReg(reg, loc.wide);
+        return TargetReg(reg, loc.wide ? kWide : kNotWide);
       }
     }
 
@@ -1263,13 +1266,14 @@
     virtual const char* GetTargetInstFmt(int opcode) = 0;
     virtual const char* GetTargetInstName(int opcode) = 0;
     virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
+
+    // Note: This may return kEncodeNone on architectures that do not expose a PC. The caller must
+    //       take care of this.
     virtual ResourceMask GetPCUseDefEncoding() const = 0;
     virtual uint64_t GetTargetInstFlags(int opcode) = 0;
     virtual size_t GetInsnSize(LIR* lir) = 0;
     virtual bool IsUnconditionalBranch(LIR* lir) = 0;
 
-    // Check support for volatile load/store of a given size.
-    virtual bool SupportsVolatileLoadStore(OpSize size) = 0;
     // Get the register class for load/store of a field.
     virtual RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) = 0;
 
@@ -1635,6 +1639,17 @@
      */
     virtual void GenConst(RegLocation rl_dest, int value);
 
+    /**
+     * Returns true iff wide GPRs are just different views on the same physical register.
+     */
+    virtual bool WideGPRsAreAliases() = 0;
+
+    /**
+     * Returns true iff wide FPRs are just different views on the same physical register.
+     */
+    virtual bool WideFPRsAreAliases() = 0;
+
+
     enum class WidenessCheck {  // private
       kIgnoreWide,
       kCheckWide,
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index e8fc919..fa1c36e 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -1157,20 +1157,23 @@
     int use_count = mir_graph_->GetUseCount(i);
     if (loc.fp) {
       if (loc.wide) {
-        // Treat doubles as a unit, using upper half of fp_counts array.
-        counts[p_map_idx + num_regs].count += use_count;
+        if (WideFPRsAreAliases()) {
+          // Floats and doubles can be counted together.
+          counts[p_map_idx].count += use_count;
+        } else {
+          // Treat doubles as a unit, using upper half of fp_counts array.
+          counts[p_map_idx + num_regs].count += use_count;
+        }
         i++;
       } else {
         counts[p_map_idx].count += use_count;
       }
     } else if (!IsInexpensiveConstant(loc)) {
-      if (loc.wide && cu_->target64) {
-        // Treat long as a unit, using upper half of core_counts array.
-        counts[p_map_idx + num_regs].count += use_count;
+      if (loc.wide && WideGPRsAreAliases()) {
+        // Longs and doubles can be counted together.
         i++;
-      } else {
-        counts[p_map_idx].count += use_count;
       }
+      counts[p_map_idx].count += use_count;
     }
   }
 }
diff --git a/compiler/dex/quick/resource_mask.h b/compiler/dex/quick/resource_mask.h
index 12ce98a..436cdb5 100644
--- a/compiler/dex/quick/resource_mask.h
+++ b/compiler/dex/quick/resource_mask.h
@@ -63,6 +63,11 @@
   ResourceMask(const ResourceMask& other) = default;
   ResourceMask& operator=(const ResourceMask& other) = default;
 
+  // Comparable by content.
+  bool operator==(const ResourceMask& other) {
+    return masks_[0] == other.masks_[0] && masks_[1] == other.masks_[1];
+  }
+
   static constexpr ResourceMask RawMask(uint64_t mask1, uint64_t mask2) {
     return ResourceMask(mask1, mask2);
   }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 8df5b6d..ebe3f0a 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -271,21 +271,22 @@
   { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
   { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
+  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
+  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
+  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
+  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
+  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
+  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
   { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
   { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
 
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
+  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
   { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
+  { kX86Test32RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!1d]" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 9000514..40dd9cc 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -151,7 +151,7 @@
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
-  RegStorage array_ptr = TargetRefReg(kArg0);
+  RegStorage array_ptr = TargetReg(kArg0, kRef);
   RegStorage payload = TargetPtrReg(kArg1);
   RegStorage method_start = TargetPtrReg(kArg2);
 
@@ -222,20 +222,32 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
 
-  /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
-
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
-      !IsLargeFrame(frame_size_, cu_->target64 ? kX86_64 : kX86);
+  InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, isa);
+
+  // If we doing an implicit stack overflow check, perform the load immediately
+  // before the stack pointer is decremented and anything is saved.
+  if (!skip_overflow_check &&
+      cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+    // Implicit stack overflow check.
+    // test eax,[esp + -overflow]
+    int overflow = GetStackOverflowReservedBytes(isa);
+    NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rX86_SP.GetReg(), -overflow);
+    MarkPossibleStackOverflowException();
+  }
+
+  /* Build frame, return address already on stack */
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ -
+                              GetInstructionSetPointerSize(cu_->instruction_set));
+
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
-  /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */
-  DCHECK_EQ(num_fp_spills_, 0);
+  SpillFPRegs();
   if (!skip_overflow_check) {
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
@@ -261,25 +273,27 @@
      private:
       const size_t sp_displace_;
     };
-    // TODO: for large frames we should do something like:
-    // spill ebp
-    // lea ebp, [esp + frame_size]
-    // cmp ebp, fs:[stack_end_]
-    // jcc stack_overflow_exception
-    // mov esp, ebp
-    // in case a signal comes in that's not using an alternate signal stack and the large frame may
-    // have moved us outside of the reserved area at the end of the stack.
-    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    if (cu_->target64) {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
-    } else {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
-    }
-    LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+      // TODO: for large frames we should do something like:
+      // spill ebp
+      // lea ebp, [esp + frame_size]
+      // cmp ebp, fs:[stack_end_]
+      // jcc stack_overflow_exception
+      // mov esp, ebp
+      // in case a signal comes in that's not using an alternate signal stack and the large frame
+      // may have moved us outside of the reserved area at the end of the stack.
+      // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+      if (cu_->target64) {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
+      } else {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
+      }
+      LIR* branch = OpCondBranch(kCondUlt, nullptr);
+      AddSlowPath(
         new(arena_)StackOverflowSlowPath(this, branch,
                                          frame_size_ -
                                          GetInstructionSetPointerSize(cu_->instruction_set)));
+    }
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -309,6 +323,7 @@
 
   NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
+  UnSpillFPRegs();
   /* Remove frame except for return address */
   stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
   NewLIR0(kX86Ret);
@@ -318,4 +333,14 @@
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  // Implicit null pointer check.
+  // test eax,[arg1+0]
+  NewLIR3(kX86Test32RM, rs_rAX.GetReg(), reg.GetReg(), 0);
+  MarkPossibleNullPointerException(opt_flags);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ff7b30e..cf4521a 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -85,23 +85,31 @@
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size) OVERRIDE;
   void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+  void GenImplicitNullCheck(RegStorage reg, int opt_flags);
 
   // Required for target - register utilities.
   RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
   RegStorage TargetReg32(SpecialTargetRegister reg);
-  RegStorage TargetReg(SpecialTargetRegister symbolic_reg, bool is_wide) OVERRIDE {
-    RegStorage reg = TargetReg32(symbolic_reg);
-    if (is_wide) {
-      return (reg.Is64Bit()) ? reg : As64BitReg(reg);
+  RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE {
+    if (wide_kind == kWide) {
+      if (cu_->target64) {
+        return As64BitReg(TargetReg32(symbolic_reg));
+      } else {
+        // x86: construct a pair.
+        DCHECK((kArg0 <= symbolic_reg && symbolic_reg < kArg3) ||
+               (kFArg0 <= symbolic_reg && symbolic_reg < kFArg3) ||
+               (kRet0 == symbolic_reg));
+        return RegStorage::MakeRegPair(TargetReg32(symbolic_reg),
+                                 TargetReg32(static_cast<SpecialTargetRegister>(symbolic_reg + 1)));
+      }
+    } else if (wide_kind == kRef && cu_->target64) {
+      return As64BitReg(TargetReg32(symbolic_reg));
     } else {
-      return (reg.Is32Bit()) ? reg : As32BitReg(reg);
+      return TargetReg32(symbolic_reg);
     }
   }
-  RegStorage TargetRefReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
-    return TargetReg(symbolic_reg, cu_->target64);
-  }
   RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
-    return TargetReg(symbolic_reg, cu_->target64);
+    return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide);
   }
   RegStorage GetArgMappingToPhysicalReg(int arg_num);
   RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
@@ -137,8 +145,6 @@
   size_t GetInsnSize(LIR* lir) OVERRIDE;
   bool IsUnconditionalBranch(LIR* lir);
 
-  // Check support for volatile load/store of a given size.
-  bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
   // Get the register class for load/store of a field.
   RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
@@ -319,6 +325,8 @@
   void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
   void SpillCoreRegs();
   void UnSpillCoreRegs();
+  void UnSpillFPRegs();
+  void SpillFPRegs();
   static const X86EncodingMap EncodingMap[kX86Last];
   bool InexpensiveConstantInt(int32_t value);
   bool InexpensiveConstantFloat(int32_t value);
@@ -796,9 +804,11 @@
    * @param base_reg The register holding the base address.
    * @param offset The offset from the base.
    * @param check_value The immediate to compare to.
+   * @param target branch target (or nullptr)
+   * @param compare output for getting LIR for comparison (or nullptr)
    */
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                         int offset, int check_value, LIR* target);
+                         int offset, int check_value, LIR* target, LIR** compare);
 
   /*
    * Can this operation be using core registers without temporaries?
@@ -931,6 +941,13 @@
 
   InToRegStorageMapping in_to_reg_storage_mapping_;
 
+  bool WideGPRsAreAliases() OVERRIDE {
+    return cu_->target64;  // On 64b, we have 64b GPRs.
+  }
+  bool WideFPRsAreAliases() OVERRIDE {
+    return true;  // xmm registers have 64b views even on x86.
+  }
+
  private:
   // The number of vector registers [0..N] reserved by a call to ReserveVectorRegisters
   int num_reserved_vector_regs_;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index f1166f6..3f1df18 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -19,6 +19,7 @@
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "mirror/art_method.h"
 #include "mirror/array.h"
 #include "x86_lir.h"
 
@@ -857,11 +858,12 @@
     RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
     RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
     LoadValueDirectWide(rl_src_expected, rs_r0q);
-    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
+    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
+            rl_new_value.reg.GetReg());
 
     // After a store we need to insert barrier in case of potential load. Since the
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
-    GenMemBarrier(kStoreLoad);
+    GenMemBarrier(kAnyAny);
 
     FreeTemp(rs_r0q);
   } else if (is_long) {
@@ -913,10 +915,11 @@
     }
     NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
 
-    // After a store we need to insert barrier in case of potential load. Since the
-    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
-    GenMemBarrier(kStoreLoad);
-
+    // After a store we need to insert barrier to prevent reordering with either
+    // earlier or later memory accesses.  Since
+    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
+    // and it will be associated with the cmpxchg instruction, preventing both.
+    GenMemBarrier(kAnyAny);
 
     if (push_si) {
       FreeTemp(rs_rSI);
@@ -952,11 +955,14 @@
       rl_offset = LoadValue(rl_src_offset, kCoreReg);
     }
     LoadValueDirect(rl_src_expected, rs_r0);
-    NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
+    NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
+            rl_new_value.reg.GetReg());
 
-    // After a store we need to insert barrier in case of potential load. Since the
-    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
-    GenMemBarrier(kStoreLoad);
+    // After a store we need to insert barrier to prevent reordering with either
+    // earlier or later memory accesses.  Since
+    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
+    // and it will be associated with the cmpxchg instruction, preventing both.
+    GenMemBarrier(kAnyAny);
 
     FreeTemp(rs_r0);
   }
@@ -1065,23 +1071,23 @@
       RegStorage new_index = index_;
       // Move index out of kArg1, either directly to kArg0, or to kArg2.
       // TODO: clean-up to check not a number but with type
-      if (index_ == m2l_->TargetReg(kArg1, false)) {
-        if (array_base_ == m2l_->TargetRefReg(kArg0)) {
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, false), index_);
-          new_index = m2l_->TargetReg(kArg2, false);
+      if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
+        if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
+          new_index = m2l_->TargetReg(kArg2, kNotWide);
         } else {
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, false), index_);
-          new_index = m2l_->TargetReg(kArg0, false);
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
+          new_index = m2l_->TargetReg(kArg0, kNotWide);
         }
       }
       // Load array length to kArg1.
-      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, false), array_base_, len_offset_);
+      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
       if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
-                                      new_index, m2l_->TargetReg(kArg1, false), true);
+                                      new_index, m2l_->TargetReg(kArg1, kNotWide), true);
       } else {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                      new_index, m2l_->TargetReg(kArg1, false), true);
+                                      new_index, m2l_->TargetReg(kArg1, kNotWide), true);
       }
     }
 
@@ -1092,6 +1098,7 @@
   };
 
   OpRegMem(kOpCmp, index, array_base, len_offset);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondUge, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
@@ -1114,14 +1121,16 @@
       GenerateTargetLabel(kPseudoThrowTarget);
 
       // Load array length to kArg1.
-      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, false), array_base_, len_offset_);
-      m2l_->LoadConstant(m2l_->TargetReg(kArg0, false), index_);
+      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
+      m2l_->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
       if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0, false), m2l_->TargetReg(kArg1, false), true);
+                                      m2l_->TargetReg(kArg0, kNotWide),
+                                      m2l_->TargetReg(kArg1, kNotWide), true);
       } else {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0, false), m2l_->TargetReg(kArg1, false), true);
+                                      m2l_->TargetReg(kArg0, kNotWide),
+                                      m2l_->TargetReg(kArg1, kNotWide), true);
       }
     }
 
@@ -1132,6 +1141,7 @@
   };
 
   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondLs, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
@@ -1467,7 +1477,8 @@
   int displacement = SRegOffset(rl_src.s_reg_low);
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
+  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
+                     r_base, displacement + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   if (!cu_->target64) {
@@ -2346,8 +2357,9 @@
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
-  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
-    (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
+  int32_t offset_of_type = mirror::Array::DataOffset(
+      sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+      (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
 
   if (rl_method.location == kLocPhysReg) {
     if (use_declaring_class) {
@@ -2395,10 +2407,10 @@
   FlushAllRegs();
   // May generate a call - use explicit registers.
   LockCallTemps();
-  RegStorage method_reg = TargetRefReg(kArg1);  // kArg1 gets current Method*.
+  RegStorage method_reg = TargetReg(kArg1, kRef);  // kArg1 gets current Method*.
   LoadCurrMethodDirect(method_reg);
-  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*.
-  RegStorage ref_reg = TargetRefReg(kArg0);  // kArg2 will hold the ref.
+  RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*.
+  RegStorage ref_reg = TargetReg(kArg0, kRef);  // kArg2 will hold the ref.
   // Reference must end up in kArg0.
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
@@ -2410,7 +2422,7 @@
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     }
-    OpRegCopy(class_reg, TargetRefReg(kRet0));
+    OpRegCopy(class_reg, TargetReg(kRet0, kRef));
     LoadValueDirectFixed(rl_src, ref_reg);
   } else if (use_declaring_class) {
     LoadValueDirectFixed(rl_src, ref_reg);
@@ -2434,7 +2446,7 @@
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
       }
-      OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path.
+      OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path.
       LoadValueDirectFixed(rl_src, ref_reg);  /* Reload Ref. */
       // Rejoin code paths
       LIR* hop_target = NewLIR0(kPseudoTargetLabel);
@@ -2455,7 +2467,7 @@
   // Is the class NULL?
   LIR* branch1 = OpCmpImmBranch(kCondEq, ref_reg, 0, NULL);
 
-  RegStorage ref_class_reg = TargetRefReg(kArg1);  // kArg2 will hold the Class*.
+  RegStorage ref_class_reg = TargetReg(kArg1, kRef);  // kArg2 will hold the Class*.
   /* Load object->klass_. */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
   LoadRefDisp(ref_reg,  mirror::Object::ClassOffset().Int32Value(), ref_class_reg,
@@ -2473,7 +2485,7 @@
       LoadConstant(rl_result.reg, 1);     // Assume result succeeds.
       branchover = OpCmpBranch(kCondEq, ref_class_reg, class_reg, NULL);
     }
-    OpRegCopy(TargetRefReg(kArg0), class_reg);
+    OpRegCopy(TargetReg(kArg0, kRef), class_reg);
     if (cu_->target64) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
     } else {
@@ -2613,7 +2625,7 @@
   } else {
     if (shift_op) {
       // X86 doesn't require masking and must use ECX.
-      RegStorage t_reg = TargetReg(kCount, false);  // rCX
+      RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
       LoadValueDirectFixed(rl_rhs, t_reg);
       if (is_two_addr) {
         // Can we do this directly into memory?
@@ -2801,7 +2813,7 @@
   }
 
   // X86 doesn't require masking and must use ECX.
-  RegStorage t_reg = TargetReg(kCount, false);  // rCX
+  RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
   LoadValueDirectFixed(rl_shift, t_reg);
   if (is_two_addr) {
     // Can we do this directly into memory?
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e81f505..06001d7 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -52,6 +52,13 @@
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 };
+static constexpr RegStorage xp_regs_arr_32[] = {
+    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+};
+static constexpr RegStorage xp_regs_arr_64[] = {
+    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+    rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+};
 static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
 static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
 static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
@@ -60,6 +67,24 @@
     rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
     rs_r8, rs_r9, rs_r10, rs_r11
 };
+
+// How to add register to be available for promotion:
+// 1) Remove register from array defining temp
+// 2) Update ClobberCallerSave
+// 3) Update JNI compiler ABI:
+// 3.1) add reg in JniCallingConvention method
+// 3.2) update CoreSpillMask/FpSpillMask
+// 4) Update entrypoints
+// 4.1) Update constants in asm_support_x86_64.h for new frame size
+// 4.2) Remove entry in SmashCallerSaves
+// 4.3) Update jni_entrypoints to spill/unspill new callee save reg
+// 4.4) Update quick_entrypoints to spill/unspill new callee save reg
+// 5) Update runtime ABI
+// 5.1) Update quick_method_frame_info with new required spills
+// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms
+// Note that you cannot use register corresponding to incoming args
+// according to ABI and QCG needs one additional XMM temp for
+// bulk copy in preparation to call.
 static constexpr RegStorage core_temps_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
     rs_r8q, rs_r9q, rs_r10q, rs_r11q
@@ -69,14 +94,14 @@
 };
 static constexpr RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
-    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
+    rs_fr8, rs_fr9, rs_fr10, rs_fr11
 };
 static constexpr RegStorage dp_temps_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
 static constexpr RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
-    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
+    rs_dr8, rs_dr9, rs_dr10, rs_dr11
 };
 
 static constexpr RegStorage xp_temps_arr_32[] = {
@@ -84,7 +109,7 @@
 };
 static constexpr RegStorage xp_temps_arr_64[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
-    rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+    rs_xr8, rs_xr9, rs_xr10, rs_xr11
 };
 
 static constexpr ArrayRef<const RegStorage> empty_pool;
@@ -95,6 +120,8 @@
 static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
 static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
 static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64);
 static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
 static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
 static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
@@ -177,7 +204,8 @@
     case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
     case kLr: res_reg =  RegStorage::InvalidReg(); break;
     case kPc: res_reg =  RegStorage::InvalidReg(); break;
-    case kSp: res_reg =  rs_rX86_SP; break;
+    case kSp: res_reg =  rs_rX86_SP_32; break;  // This must be the concrete one, as _SP is target-
+                                                // specific size.
     case kArg0: res_reg = rs_rX86_ARG0; break;
     case kArg1: res_reg = rs_rX86_ARG1; break;
     case kArg2: res_reg = rs_rX86_ARG2; break;
@@ -219,11 +247,6 @@
 }
 
 ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const {
-  /*
-   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
-   * able to clean up some of the x86/Arm_Mips differences
-   */
-  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
   return kEncodeNone;
 }
 
@@ -437,21 +460,13 @@
 
 /* Clobber all regs that might be used by an external C call */
 void X86Mir2Lir::ClobberCallerSave() {
-  Clobber(rs_rAX);
-  Clobber(rs_rCX);
-  Clobber(rs_rDX);
-  Clobber(rs_rBX);
-
-  Clobber(rs_fr0);
-  Clobber(rs_fr1);
-  Clobber(rs_fr2);
-  Clobber(rs_fr3);
-  Clobber(rs_fr4);
-  Clobber(rs_fr5);
-  Clobber(rs_fr6);
-  Clobber(rs_fr7);
-
   if (cu_->target64) {
+    Clobber(rs_rAX);
+    Clobber(rs_rCX);
+    Clobber(rs_rDX);
+    Clobber(rs_rSI);
+    Clobber(rs_rDI);
+
     Clobber(rs_r8);
     Clobber(rs_r9);
     Clobber(rs_r10);
@@ -461,11 +476,21 @@
     Clobber(rs_fr9);
     Clobber(rs_fr10);
     Clobber(rs_fr11);
-    Clobber(rs_fr12);
-    Clobber(rs_fr13);
-    Clobber(rs_fr14);
-    Clobber(rs_fr15);
+  } else {
+    Clobber(rs_rAX);
+    Clobber(rs_rCX);
+    Clobber(rs_rDX);
+    Clobber(rs_rBX);
   }
+
+  Clobber(rs_fr0);
+  Clobber(rs_fr1);
+  Clobber(rs_fr2);
+  Clobber(rs_fr3);
+  Clobber(rs_fr4);
+  Clobber(rs_fr5);
+  Clobber(rs_fr6);
+  Clobber(rs_fr7);
 }
 
 RegLocation X86Mir2Lir::GetReturnWideAlt() {
@@ -553,11 +578,11 @@
 
   bool ret = false;
   /*
-   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
-   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
-   * to ensure is that there is a scheduling barrier in place.
+   * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
+   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+   * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
-  if (barrier_kind == kStoreLoad) {
+  if (barrier_kind == kAnyAny) {
     // If no LIR exists already that can be used a barrier, then generate an mfence.
     if (mem_barrier == nullptr) {
       mem_barrier = NewLIR0(kX86Mfence);
@@ -599,11 +624,15 @@
   // Target-specific adjustments.
 
   // Add in XMM registers.
-  const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
-  for (RegStorage reg : *xp_temps) {
+  const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32;
+  for (RegStorage reg : *xp_regs) {
     RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
     reginfo_map_.Put(reg.GetReg(), info);
-    info->SetIsTemp(true);
+  }
+  const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
+  for (RegStorage reg : *xp_temps) {
+    RegisterInfo* xp_reg_info = GetRegInfo(reg);
+    xp_reg_info->SetIsTemp(true);
   }
 
   // Alias single precision xmm to double xmms.
@@ -665,9 +694,11 @@
   // Spill mask not including fake return address register
   uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
   int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
+  OpSize size = cu_->target64 ? k64 : k32;
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
-      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
+      StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
+                   size, kNotVolatile);
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -680,20 +711,48 @@
   // Spill mask not including fake return address register
   uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
   int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
+  OpSize size = cu_->target64 ? k64 : k32;
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
-      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
+      LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
+                   size, kNotVolatile);
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
 
-bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
-  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
+void X86Mir2Lir::SpillFPRegs() {
+  if (num_fp_spills_ == 0) {
+    return;
+  }
+  uint32_t mask = fp_spill_mask_;
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
+  for (int reg = 0; mask; mask >>= 1, reg++) {
+    if (mask & 0x1) {
+      StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
+                   k64, kNotVolatile);
+      offset += sizeof(double);
+    }
+  }
+}
+void X86Mir2Lir::UnSpillFPRegs() {
+  if (num_fp_spills_ == 0) {
+    return;
+  }
+  uint32_t mask = fp_spill_mask_;
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
+  for (int reg = 0; mask; mask >>= 1, reg++) {
+    if (mask & 0x1) {
+      LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
+                   k64, kNotVolatile);
+      offset += sizeof(double);
+    }
+  }
 }
 
-bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
-  return true;
+
+bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
+  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
 }
 
 RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
@@ -825,8 +884,12 @@
 }
 
 LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
-  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
-  return nullptr;
+  // First load the pointer in fs:[suspend-trigger] into eax
+  // Then use a test instruction to indirect via that address.
+  NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),   cu_->target64 ?
+      Thread::ThreadSuspendTriggerOffset<8>().Int32Value() :
+      Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
+  return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
 }
 
 uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
@@ -906,7 +969,8 @@
   uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
 
   // Generate the move instruction with the unique pointer and save index, dex_file, and type.
-  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, false).GetReg(),
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI,
+                     TargetReg(symbolic_reg, kNotWide).GetReg(),
                      static_cast<int>(target_method_id_ptr), target_method_idx,
                      WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
   AppendLIR(move);
@@ -923,7 +987,8 @@
   uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
 
   // Generate the move instruction with the unique pointer and save index and type.
-  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, false).GetReg(),
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI,
+                     TargetReg(symbolic_reg, kNotWide).GetReg(),
                      static_cast<int>(ptr), type_idx);
   AppendLIR(move);
   class_type_address_insns_.Insert(move);
@@ -1189,6 +1254,7 @@
   // Is the string non-NULL?
   LoadValueDirectFixed(rl_obj, rs_rDX);
   GenNullCheck(rs_rDX, info->opt_flags);
+  // uint32_t opt_flags = info->opt_flags;
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
 
   // Does the character fit in 16 bits?
@@ -1215,12 +1281,20 @@
   // Character is in EAX.
   // Object pointer is in EDX.
 
+  // Compute the number of words to search in to rCX.
+  Load32Disp(rs_rDX, count_offset, rs_rCX);
+
+  // Possible signal here due to null pointer dereference.
+  // Note that the signal handler will expect the top word of
+  // the stack to be the ArtMethod*.  If the PUSH edi instruction
+  // below is ahead of the load above then this will not be true
+  // and the signal handler will not work.
+  MarkPossibleNullPointerException(0);
+
   // We need to preserve EDI, but have no spare registers, so push it on the stack.
   // We have to remember that all stack addresses after this are offset by sizeof(EDI).
   NewLIR1(kX86Push32R, rs_rDI.GetReg());
 
-  // Compute the number of words to search in to rCX.
-  Load32Disp(rs_rDX, count_offset, rs_rCX);
   LIR *length_compare = nullptr;
   int start_value = 0;
   bool is_index_on_stack = false;
@@ -1259,7 +1333,8 @@
         }
       } else {
         // Load the start index from stack, remembering that we pushed EDI.
-        int displacement = SRegOffset(rl_start.s_reg_low) + (cu_->target64 ? 2 : 1) * sizeof(uint32_t);
+        int displacement = SRegOffset(rl_start.s_reg_low) +
+                           (cu_->target64 ? 2 : 1) * sizeof(uint32_t);
         {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
           Load32Disp(rs_rX86_SP, displacement, tmpReg);
@@ -2179,21 +2254,24 @@
 }
 
 // ------------ ABI support: mapping of args to physical registers -------------
-RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) {
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide,
+                                                              bool is_ref) {
   const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
-  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
+  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) /
+      sizeof(SpecialTargetRegister);
   const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
-                                                  kFArg4, kFArg5, kFArg6, kFArg7};
-  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
+                                                             kFArg4, kFArg5, kFArg6, kFArg7};
+  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) /
+      sizeof(SpecialTargetRegister);
 
   if (is_double_or_float) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide);
+      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide ? kWide : kNotWide);
     }
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      return is_ref ? ml_->TargetRefReg(coreArgMappingToPhysicalReg[cur_core_reg_++]) :
-                      ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], is_wide);
+      return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                            is_ref ? kRef : (is_wide ? kWide : kNotWide));
     }
   }
   return RegStorage::InvalidReg();
@@ -2205,7 +2283,8 @@
   return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
 }
 
-void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
+void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
+                                                   InToRegStorageMapper* mapper) {
   DCHECK(mapper != nullptr);
   max_mapped_in_ = -1;
   is_there_stack_mapped_ = false;
@@ -2276,13 +2355,13 @@
 
   RegLocation rl_src = rl_method;
   rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetRefReg(kArg0);
+  rl_src.reg = TargetReg(kArg0, kRef);
   rl_src.home = false;
   MarkLive(rl_src);
   StoreValue(rl_method, rl_src);
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(rs_rX86_SP, 0, As32BitReg(TargetRefReg(kArg0)), kNotVolatile);
+    StoreRefDisp(rs_rX86_SP, 0, As32BitReg(TargetReg(kArg0, kRef)), kNotVolatile);
   }
 
   if (cu_->num_ins == 0) {
@@ -2440,7 +2519,8 @@
 
     // The rest can be copied together
     int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
+                                                   cu_->instruction_set);
 
     int current_src_offset = start_offset;
     int current_dest_offset = outs_offset;
@@ -2536,7 +2616,7 @@
 
         // Instead of allocating a new temp, simply reuse one of the registers being used
         // for argument passing.
-        RegStorage temp = TargetReg(kArg3, false);
+        RegStorage temp = TargetReg(kArg3, kNotWide);
 
         // Now load the argument VR and store to the outs.
         Load32Disp(rs_rX86_SP, current_src_offset, temp);
@@ -2552,8 +2632,8 @@
 
   // Now handle rest not registers if they are
   if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regSingle = TargetReg(kArg2, false);
-    RegStorage regWide = TargetReg(kArg3, true);
+    RegStorage regSingle = TargetReg(kArg2, kNotWide);
+    RegStorage regWide = TargetReg(kArg3, kWide);
     for (int i = start_index;
          i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) {
       RegLocation rl_arg = info->args[i];
@@ -2611,14 +2691,14 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
+      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 657160f..047a65d 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -684,9 +684,9 @@
     } else {
       DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
       if (r_base == r_dest.GetLow()) {
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
+        load = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
                         displacement + HIWORD_OFFSET);
-        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
+        load2 = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
       } else {
         load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
         load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
@@ -712,16 +712,16 @@
         if (r_dest.GetHigh() == r_index) {
           // We can't use either register for the first load.
           RegStorage temp = AllocTemp();
-          load2 = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
           OpRegCopy(r_dest.GetHigh(), temp);
           FreeTemp(temp);
         } else {
-          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
         }
       } else {
@@ -744,6 +744,7 @@
     }
   }
 
+  // Always return first load generated as this might cause a fault if base is nullptr.
   return load;
 }
 
@@ -762,10 +763,7 @@
                                   size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // Without context sensitive analysis, we must issue the most conservative barriers.
-    // In this case, either a load or store may follow so we issue both barriers.
-    GenMemBarrier(kLoadLoad);
-    GenMemBarrier(kLoadStore);
+    GenMemBarrier(kLoadAny);  // Only a scheduling barrier.
   }
 
   return load;
@@ -863,8 +861,7 @@
 LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
                                VolatileKind is_volatile) {
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // There might have been a store before this volatile one so insert StoreStore barrier.
-    GenMemBarrier(kStoreStore);
+    GenMemBarrier(kAnyStore);  // Only a scheduling barrier.
   }
 
   // StoreBaseDisp() will emit correct insn for atomic store on x86
@@ -873,17 +870,21 @@
   LIR* store = StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
-    // A load might follow the volatile store so insert a StoreLoad barrier.
-    GenMemBarrier(kStoreLoad);
+    // A volatile load might follow the volatile store so insert a StoreLoad barrier.
+    // This does require a fence, even on x86.
+    GenMemBarrier(kAnyAny);
   }
 
   return store;
 }
 
 LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target) {
-    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(), offset,
-            check_value);
+                                   int offset, int check_value, LIR* target, LIR** compare) {
+    LIR* inst = NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(),
+            offset, check_value);
+    if (compare != nullptr) {
+        *compare = inst;
+    }
     LIR* branch = OpCondBranch(cond, target);
     return branch;
 }
@@ -917,7 +918,7 @@
 
   for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     int opcode = mir->dalvikInsn.opcode;
-    if (MIRGraph::IsPseudoMirOp(opcode)) {
+    if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
       AnalyzeExtendedMIR(opcode, bb, mir);
     } else {
       AnalyzeMIR(opcode, bb, mir);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 2789923..17f9b91 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -66,7 +66,9 @@
  *  XMM6: caller | caller, arg7 | caller, scratch            | caller, arg7, scratch
  *  XMM7: caller | caller, arg8 | caller, scratch            | caller, arg8, scratch
  *  ---  x86-64/x32 registers
- *  XMM8 .. 15: caller save available as scratch registers for ART.
+ *  XMM8 .. 11: caller save available as scratch registers for ART.
+ *  XMM12 .. 15: callee save available as promoted registers for ART.
+ *  This change (XMM12..15) is for QCG only, for others they are caller save.
  *
  * X87 is a necessary evil outside of ART code for x86:
  *  ST0:  x86 float/double native return value, caller save
@@ -497,6 +499,7 @@
   UnaryOpcode(kX86Test, RI, MI, AI),
   kX86Test32RR,
   kX86Test64RR,
+  kX86Test32RM,
   UnaryOpcode(kX86Not, R, M, A),
   UnaryOpcode(kX86Neg, R, M, A),
   UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
new file mode 100644
index 0000000..03bda78
--- /dev/null
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "quick_compiler_callbacks.h"
+
+#include "quick/dex_file_to_method_inliner_map.h"
+#include "verifier/method_verifier-inl.h"
+#include "verification_results.h"
+
+namespace art {
+
+bool QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
+  bool result = verification_results_->ProcessVerifiedMethod(verifier);
+  if (result) {
+    MethodReference ref = verifier->GetMethodReference();
+    method_inliner_map_->GetMethodInliner(ref.dex_file)
+        ->AnalyseMethodCode(verifier);
+  }
+  return result;
+}
+
+void QuickCompilerCallbacks::ClassRejected(ClassReference ref) {
+  verification_results_->AddRejectedClass(ref);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
new file mode 100644
index 0000000..7c9614f
--- /dev/null
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_
+#define ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_
+
+#include "compiler_callbacks.h"
+
+namespace art {
+
+class VerificationResults;
+class DexFileToMethodInlinerMap;
+
+class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
+  public:
+    QuickCompilerCallbacks(VerificationResults* verification_results,
+                           DexFileToMethodInlinerMap* method_inliner_map)
+        : verification_results_(verification_results),
+          method_inliner_map_(method_inliner_map) {
+      CHECK(verification_results != nullptr);
+      CHECK(method_inliner_map != nullptr);
+    }
+
+    ~QuickCompilerCallbacks() { }
+
+    bool MethodVerified(verifier::MethodVerifier* verifier)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+
+    void ClassRejected(ClassReference ref) OVERRIDE;
+
+  private:
+    VerificationResults* const verification_results_;
+    DexFileToMethodInlinerMap* const method_inliner_map_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 8ed3adc..addd628 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DEX_REG_STORAGE_H_
 
 #include "base/logging.h"
+#include "compiler_enums.h"  // For WideKind
 
 namespace art {
 
@@ -149,6 +150,10 @@
     return ((reg_ & k64BitMask) == k64Bits);
   }
 
+  constexpr WideKind GetWideKind() const {
+    return Is64Bit() ? kWide : kNotWide;
+  }
+
   constexpr bool Is64BitSolo() const {
     return ((reg_ & kShapeMask) == k64BitSolo);
   }
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index db383c4..892b302 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -251,7 +251,8 @@
 
     // Special-case handling for format 35c/3rc invokes
     Instruction::Code opcode = mir->dalvikInsn.opcode;
-    int flags = IsPseudoMirOp(opcode) ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
+    int flags = MIR::DecodedInstruction::IsPseudoMirOp(opcode) ?
+                  0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
     if ((flags & Instruction::kInvoke) &&
         (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) {
       DCHECK_EQ(next, 0);
diff --git a/compiler/driver/compiler_callbacks_impl.h b/compiler/driver/compiler_callbacks_impl.h
deleted file mode 100644
index 92adb20..0000000
--- a/compiler/driver/compiler_callbacks_impl.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DRIVER_COMPILER_CALLBACKS_IMPL_H_
-#define ART_COMPILER_DRIVER_COMPILER_CALLBACKS_IMPL_H_
-
-#include "compiler_callbacks.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "verifier/method_verifier-inl.h"
-
-namespace art {
-
-class CompilerCallbacksImpl FINAL : public CompilerCallbacks {
-  public:
-    CompilerCallbacksImpl(VerificationResults* verification_results,
-                          DexFileToMethodInlinerMap* method_inliner_map)
-        : verification_results_(verification_results),
-          method_inliner_map_(method_inliner_map) {
-      CHECK(verification_results != nullptr);
-      CHECK(method_inliner_map != nullptr);
-    }
-
-    ~CompilerCallbacksImpl() { }
-
-    bool MethodVerified(verifier::MethodVerifier* verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
-    void ClassRejected(ClassReference ref) OVERRIDE {
-      verification_results_->AddRejectedClass(ref);
-    }
-
-  private:
-    VerificationResults* const verification_results_;
-    DexFileToMethodInlinerMap* const method_inliner_map_;
-};
-
-inline bool CompilerCallbacksImpl::MethodVerified(verifier::MethodVerifier* verifier) {
-  bool result = verification_results_->ProcessVerifiedMethod(verifier);
-  if (result) {
-    MethodReference ref = verifier->GetMethodReference();
-    method_inliner_map_->GetMethodInliner(ref.dex_file)
-        ->AnalyseMethodCode(verifier);
-  }
-  return result;
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DRIVER_COMPILER_CALLBACKS_IMPL_H_
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 324f717..89295f2 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -18,13 +18,12 @@
 #define ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_
 
 #include "compiler_driver.h"
+
 #include "dex/compiler_ir.h"
-#include "mirror/art_field.h"
+#include "field_helper.h"
 #include "mirror/art_field-inl.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class_loader.h"
-#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
@@ -42,7 +41,7 @@
 }
 
 inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass(
-    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
@@ -59,7 +58,7 @@
 }
 
 inline mirror::ArtField* CompilerDriver::ResolveField(
-    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t field_idx, bool is_static) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
@@ -95,14 +94,13 @@
 
 inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField(
     mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    mirror::ArtField* resolved_field, uint16_t field_idx, MemberOffset* field_offset) {
+    mirror::ArtField* resolved_field, uint16_t field_idx) {
   DCHECK(!resolved_field->IsStatic());
   mirror::Class* fields_class = resolved_field->GetDeclaringClass();
   bool fast_get = referrer_class != nullptr &&
       referrer_class->CanAccessResolvedField(fields_class, resolved_field,
                                              dex_cache, field_idx);
   bool fast_put = fast_get && (!resolved_field->IsFinal() || fields_class == referrer_class);
-  *field_offset = fast_get ? resolved_field->GetOffset() : MemberOffset(0u);
   return std::make_pair(fast_get, fast_put);
 }
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9bf5135..9e88c8d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -25,6 +25,7 @@
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
 #include "class_linker.h"
+#include "compiled_class.h"
 #include "compiler.h"
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
@@ -34,7 +35,7 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "driver/compiler_options.h"
 #include "jni_internal.h"
-#include "object_utils.h"
+#include "object_lock.h"
 #include "profiler.h"
 #include "runtime.h"
 #include "gc/accounting/card_table-inl.h"
@@ -989,10 +990,10 @@
   stats_->ProcessedInvoke(invoke_type, flags);
 }
 
-bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                              bool is_put, MemberOffset* field_offset,
-                                              bool* is_volatile) {
-  ScopedObjectAccess soa(Thread::Current());
+mirror::ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx,
+                                                           const DexCompilationUnit* mUnit,
+                                                           bool is_put,
+                                                           const ScopedObjectAccess& soa) {
   // Try to resolve the field and compiling method's class.
   mirror::ArtField* resolved_field;
   mirror::Class* referrer_class;
@@ -1010,20 +1011,34 @@
     resolved_field = resolved_field_handle.Get();
     dex_cache = dex_cache_handle.Get();
   }
-  bool result = false;
+  bool can_link = false;
   if (resolved_field != nullptr && referrer_class != nullptr) {
-    *is_volatile = IsFieldVolatile(resolved_field);
     std::pair<bool, bool> fast_path = IsFastInstanceField(
-        dex_cache, referrer_class, resolved_field, field_idx, field_offset);
-    result = is_put ? fast_path.second : fast_path.first;
+        dex_cache, referrer_class, resolved_field, field_idx);
+    can_link = is_put ? fast_path.second : fast_path.first;
   }
-  if (!result) {
+  ProcessedInstanceField(can_link);
+  return can_link ? resolved_field : nullptr;
+}
+
+bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
+                                              bool is_put, MemberOffset* field_offset,
+                                              bool* is_volatile) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ArtField> resolved_field =
+      hs.NewHandle(ComputeInstanceFieldInfo(field_idx, mUnit, is_put, soa));
+
+  if (resolved_field.Get() == nullptr) {
     // Conservative defaults.
     *is_volatile = true;
     *field_offset = MemberOffset(static_cast<size_t>(-1));
+    return false;
+  } else {
+    *is_volatile = resolved_field->IsVolatile();
+    *field_offset = resolved_field->GetOffset();
+    return true;
   }
-  ProcessedInstanceField(result);
-  return result;
 }
 
 bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9903421..6dae398 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -24,7 +24,6 @@
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "class_reference.h"
-#include "compiled_class.h"
 #include "compiled_method.h"
 #include "compiler.h"
 #include "dex_file.h"
@@ -32,6 +31,7 @@
 #include "instruction_set.h"
 #include "invoke_type.h"
 #include "method_reference.h"
+#include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
 #include "profiler.h"
 #include "runtime.h"
@@ -46,6 +46,7 @@
 class MethodVerifier;
 }  // namespace verifier
 
+class CompiledClass;
 class CompilerOptions;
 class DexCompilationUnit;
 class DexFileToMethodInlinerMap;
@@ -221,14 +222,14 @@
 
   // Resolve compiling method's class. Returns nullptr on failure.
   mirror::Class* ResolveCompilingMethodsClass(
-      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
       Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a field. Returns nullptr on failure, including incompatible class change.
   // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static.
   mirror::ArtField* ResolveField(
-      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
       Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       uint32_t field_idx, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -244,7 +245,7 @@
   // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset.
   std::pair<bool, bool> IsFastInstanceField(
       mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-      mirror::ArtField* resolved_field, uint16_t field_idx, MemberOffset* field_offset)
+      mirror::ArtField* resolved_field, uint16_t field_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Can we fast-path an SGET/SPUT access to a static field? If yes, compute the field offset,
@@ -297,6 +298,13 @@
                                 MemberOffset* field_offset, bool* is_volatile)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
+  mirror::ArtField* ComputeInstanceFieldInfo(uint32_t field_idx,
+                                             const DexCompilationUnit* mUnit,
+                                             bool is_put,
+                                             const ScopedObjectAccess& soa)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+
   // Can we fastpath static field access? Computes field's offset, volatility and whether the
   // field is within the referrer (which can avoid checking class initialization).
   bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 5325a68..9ae9bd4 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -25,12 +25,13 @@
 #include "dex_file.h"
 #include "gc/heap.h"
 #include "mirror/art_method-inl.h"
-#include "mirror/class.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 92b2fee..c0f91d16 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -56,9 +56,9 @@
     include_patch_information_(kDefaultIncludePatchInformation),
     top_k_profile_threshold_(kDefaultTopKProfileThreshold),
     include_debug_symbols_(kDefaultIncludeDebugSymbols),
-    explicit_null_checks_(true),
-    explicit_so_checks_(true),
-    explicit_suspend_checks_(true)
+    implicit_null_checks_(false),
+    implicit_so_checks_(false),
+    implicit_suspend_checks_(false)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -74,9 +74,9 @@
                   bool include_patch_information,
                   double top_k_profile_threshold,
                   bool include_debug_symbols,
-                  bool explicit_null_checks,
-                  bool explicit_so_checks,
-                  bool explicit_suspend_checks
+                  bool implicit_null_checks,
+                  bool implicit_so_checks,
+                  bool implicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -91,9 +91,9 @@
     include_patch_information_(include_patch_information),
     top_k_profile_threshold_(top_k_profile_threshold),
     include_debug_symbols_(include_debug_symbols),
-    explicit_null_checks_(explicit_null_checks),
-    explicit_so_checks_(explicit_so_checks),
-    explicit_suspend_checks_(explicit_suspend_checks)
+    implicit_null_checks_(implicit_null_checks),
+    implicit_so_checks_(implicit_so_checks),
+    implicit_suspend_checks_(implicit_suspend_checks)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -160,28 +160,28 @@
     return include_debug_symbols_;
   }
 
-  bool GetExplicitNullChecks() const {
-    return explicit_null_checks_;
+  bool GetImplicitNullChecks() const {
+    return implicit_null_checks_;
   }
 
-  void SetExplicitNullChecks(bool new_val) {
-    explicit_null_checks_ = new_val;
+  void SetImplicitNullChecks(bool new_val) {
+    implicit_null_checks_ = new_val;
   }
 
-  bool GetExplicitStackOverflowChecks() const {
-    return explicit_so_checks_;
+  bool GetImplicitStackOverflowChecks() const {
+    return implicit_so_checks_;
   }
 
-  void SetExplicitStackOverflowChecks(bool new_val) {
-    explicit_so_checks_ = new_val;
+  void SetImplicitStackOverflowChecks(bool new_val) {
+    implicit_so_checks_ = new_val;
   }
 
-  bool GetExplicitSuspendChecks() const {
-    return explicit_suspend_checks_;
+  bool GetImplicitSuspendChecks() const {
+    return implicit_suspend_checks_;
   }
 
-  void SetExplicitSuspendChecks(bool new_val) {
-    explicit_suspend_checks_ = new_val;
+  void SetImplicitSuspendChecks(bool new_val) {
+    implicit_suspend_checks_ = new_val;
   }
 
 #ifdef ART_SEA_IR_MODE
@@ -208,9 +208,9 @@
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool include_debug_symbols_;
-  bool explicit_null_checks_;
-  bool explicit_so_checks_;
-  bool explicit_suspend_checks_;
+  bool implicit_null_checks_;
+  bool implicit_so_checks_;
+  bool implicit_suspend_checks_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index e637cfb..e479322 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -16,8 +16,10 @@
 
 #include "elf_file.h"
 
+#include "base/stringprintf.h"
 #include "common_compiler_test.h"
 #include "oat.h"
+#include "utils.h"
 
 namespace art {
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index d52ec0a..fe4fcd4 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -20,13 +20,15 @@
 #include <string>
 #include <vector>
 
+#include "base/unix_file/fd_file.h"
 #include "common_compiler_test.h"
-#include "compiler/elf_fixup.h"
-#include "compiler/image_writer.h"
-#include "compiler/oat_writer.h"
+#include "elf_fixup.h"
 #include "gc/space/image_space.h"
+#include "image_writer.h"
 #include "lock_word.h"
 #include "mirror/object-inl.h"
+#include "oat_writer.h"
+#include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "utils.h"
 #include "vector_output_stream.h"
@@ -77,8 +79,9 @@
 
       t.NewTiming("WriteElf");
       ScopedObjectAccess soa(Thread::Current());
-      OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0, 0, "", compiler_driver_.get(), &timings);
+      SafeMap<std::string, std::string> key_value_store;
+      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, compiler_driver_.get(), &timings,
+                           &key_value_store);
       bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                                 !kIsTargetBuild,
                                                 class_linker->GetBootClassPath(),
@@ -134,7 +137,7 @@
   // Remove the reservation of the memory for use to load the image.
   UnreserveImageSpace();
 
-  Runtime::Options options;
+  RuntimeOptions options;
   std::string image("-Ximage:");
   image.append(image_location.GetFilename());
   options.push_back(std::make_pair(image.c_str(), reinterpret_cast<void*>(NULL)));
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index acfa607..8ef2964 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -51,7 +51,6 @@
 #include "mirror/string-inl.h"
 #include "oat.h"
 #include "oat_file.h"
-#include "object_utils.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
@@ -630,11 +629,33 @@
         mirror::Reference::ReferentOffset(), image_writer_->GetImageAddress(ref->GetReferent()));
   }
 
- private:
+ protected:
   ImageWriter* const image_writer_;
   mirror::Object* const copy_;
 };
 
+class FixupClassVisitor FINAL : public FixupVisitor {
+ public:
+  FixupClassVisitor(ImageWriter* image_writer, Object* copy) : FixupVisitor(image_writer, copy) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    DCHECK(obj->IsClass());
+    FixupVisitor::operator()(obj, offset, false);
+
+    if (offset.Uint32Value() < mirror::Class::EmbeddedVTableOffset().Uint32Value()) {
+      return;
+    }
+  }
+
+  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    LOG(FATAL) << "Reference not expected here.";
+  }
+};
+
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != nullptr);
   DCHECK(copy != nullptr);
@@ -646,13 +667,68 @@
       DCHECK_EQ(copy->GetReadBarrierPointer(), GetImageAddress(orig));
     }
   }
-  FixupVisitor visitor(this, copy);
-  orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  if (orig->IsClass() && orig->AsClass()->ShouldHaveEmbeddedImtAndVTable()) {
+    FixupClassVisitor visitor(this, copy);
+    orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  } else {
+    FixupVisitor visitor(this, copy);
+    orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  }
   if (orig->IsArtMethod<kVerifyNone>()) {
     FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
   }
 }
 
+const byte* ImageWriter::GetQuickCode(mirror::ArtMethod* method, bool* quick_is_interpreted) {
+  DCHECK(!method->IsResolutionMethod() && !method->IsImtConflictMethod() &&
+         !method->IsAbstract()) << PrettyMethod(method);
+
+  // Use original code if it exists. Otherwise, set the code pointer to the resolution
+  // trampoline.
+
+  // Quick entrypoint:
+  const byte* quick_code = GetOatAddress(method->GetQuickOatCodeOffset());
+  *quick_is_interpreted = false;
+  if (quick_code != nullptr &&
+      (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) {
+    // We have code for a non-static or initialized method, just use the code.
+  } else if (quick_code == nullptr && method->IsNative() &&
+      (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) {
+    // Non-static or initialized native method missing compiled code, use generic JNI version.
+    quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
+  } else if (quick_code == nullptr && !method->IsNative()) {
+    // We don't have code at all for a non-native method, use the interpreter.
+    quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+    *quick_is_interpreted = true;
+  } else {
+    CHECK(!method->GetDeclaringClass()->IsInitialized());
+    // We have code for a static method, but need to go through the resolution stub for class
+    // initialization.
+    quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
+  }
+  return quick_code;
+}
+
+const byte* ImageWriter::GetQuickEntryPoint(mirror::ArtMethod* method) {
+  // Calculate the quick entry point following the same logic as FixupMethod() below.
+  // The resolution method has a special trampoline to call.
+  if (UNLIKELY(method == Runtime::Current()->GetResolutionMethod())) {
+    return GetOatAddress(quick_resolution_trampoline_offset_);
+  } else if (UNLIKELY(method == Runtime::Current()->GetImtConflictMethod())) {
+    return GetOatAddress(quick_imt_conflict_trampoline_offset_);
+  } else {
+    // We assume all methods have code. If they don't currently then we set them to the use the
+    // resolution trampoline. Abstract methods never have code and so we need to make sure their
+    // use results in an AbstractMethodError. We use the interpreter to achieve this.
+    if (UNLIKELY(method->IsAbstract())) {
+      return GetOatAddress(quick_to_interpreter_bridge_offset_);
+    } else {
+      bool quick_is_interpreted;
+      return GetQuickCode(method, &quick_is_interpreted);
+    }
+  }
+}
+
 void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
@@ -674,29 +750,8 @@
       copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
           (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
-      // Use original code if it exists. Otherwise, set the code pointer to the resolution
-      // trampoline.
-
-      // Quick entrypoint:
-      const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
-      bool quick_is_interpreted = false;
-      if (quick_code != nullptr &&
-          (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
-        // We have code for a non-static or initialized method, just use the code.
-      } else if (quick_code == nullptr && orig->IsNative() &&
-          (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
-        // Non-static or initialized native method missing compiled code, use generic JNI version.
-        quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
-      } else if (quick_code == nullptr && !orig->IsNative()) {
-        // We don't have code at all for a non-native method, use the interpreter.
-        quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
-        quick_is_interpreted = true;
-      } else {
-        CHECK(!orig->GetDeclaringClass()->IsInitialized());
-        // We have code for a static method, but need to go through the resolution stub for class
-        // initialization.
-        quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
-      }
+      bool quick_is_interpreted;
+      const byte* quick_code = GetQuickCode(orig, &quick_is_interpreted);
       copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
 
       // Portable entrypoint:
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 2bcb41e..cf5bc93 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -149,6 +149,13 @@
   void FixupObject(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Get quick code for non-resolution/imt_conflict/abstract method.
+  const byte* GetQuickCode(mirror::ArtMethod* method, bool* quick_is_interpreted)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const byte* GetQuickEntryPoint(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Patches references in OatFile to expect runtime addresses.
   void PatchOatCodeAndMethods(File* elf_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -192,6 +199,7 @@
   uint32_t quick_to_interpreter_bridge_offset_;
 
   friend class FixupVisitor;
+  friend class FixupClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index efc0b42..6db0c3b 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -19,6 +19,7 @@
 
 #include <vector>
 #include "handle_scope.h"
+#include "primitive.h"
 #include "thread.h"
 #include "utils/managed_register.h"
 
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 3bbb723..dec84f1 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -28,6 +28,7 @@
 #include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_internal.h"
+#include "mirror/art_method.h"
 #include "utils/assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 5febed2..525f05c 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -130,6 +130,10 @@
   callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
   callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
   callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
@@ -137,6 +141,10 @@
       1 << kNumberOfCpuRegisters;
 }
 
+uint32_t X86_64JniCallingConvention::FpSpillMask() const {
+  return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+}
+
 size_t X86_64JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
   size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 1ba5353..7a90c6e 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -61,9 +61,7 @@
   }
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index f8dca66..902f8dd 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -1648,7 +1648,7 @@
     field_value = SignOrZeroExtendCat1Types(field_value, field_jty);
 
     if (is_volatile) {
-      irb_.CreateMemoryBarrier(art::kLoadLoad);
+      irb_.CreateMemoryBarrier(art::kLoadAny);
     }
   }
 
@@ -1702,7 +1702,7 @@
     DCHECK_GE(field_offset.Int32Value(), 0);
 
     if (is_volatile) {
-      irb_.CreateMemoryBarrier(art::kStoreStore);
+      irb_.CreateMemoryBarrier(art::kAnyStore);
     }
 
     llvm::PointerType* field_type =
@@ -1717,7 +1717,7 @@
     irb_.CreateStore(new_value, field_addr, kTBAAHeapInstance, field_jty);
 
     if (is_volatile) {
-      irb_.CreateMemoryBarrier(art::kLoadLoad);
+      irb_.CreateMemoryBarrier(art::kAnyAny);
     }
 
     if (field_jty == kObject) {  // If put an object, mark the GC card table.
@@ -1870,7 +1870,7 @@
   phi->addIncoming(loaded_storage_object_addr, block_after_load_static);
 
   // Ensure load of status and load of value don't re-order.
-  irb_.CreateMemoryBarrier(art::kLoadLoad);
+  irb_.CreateMemoryBarrier(art::kLoadAny);
 
   return phi;
 }
@@ -1948,7 +1948,7 @@
     static_field_value = SignOrZeroExtendCat1Types(static_field_value, field_jty);
 
     if (is_volatile) {
-      irb_.CreateMemoryBarrier(art::kLoadLoad);
+      irb_.CreateMemoryBarrier(art::kLoadAny);
     }
   }
 
@@ -2025,7 +2025,7 @@
     }
 
     if (is_volatile) {
-      irb_.CreateMemoryBarrier(art::kStoreStore);
+      irb_.CreateMemoryBarrier(art::kAnyStore);
     }
 
     llvm::Value* static_field_offset_value = irb_.getPtrEquivInt(field_offset.Int32Value());
@@ -2038,7 +2038,7 @@
     irb_.CreateStore(new_value, static_field_addr, kTBAAHeapStatic, field_jty);
 
     if (is_volatile) {
-      irb_.CreateMemoryBarrier(art::kStoreLoad);
+      irb_.CreateMemoryBarrier(art::kAnyAny);
     }
 
     if (field_jty == kObject) {  // If put an object, mark the GC card table.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 254faac..84f0b3c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -15,14 +15,18 @@
  */
 
 #include "common_compiler_test.h"
-#include "compiler/compiler.h"
-#include "compiler/oat_writer.h"
+#include "compiler.h"
+#include "dex/verification_results.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/quick_compiler_callbacks.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "mirror/object-inl.h"
 #include "oat_file-inl.h"
+#include "oat_writer.h"
+#include "scoped_thread_state_change.h"
 #include "vector_output_stream.h"
 
 namespace art {
@@ -94,8 +98,8 @@
   compiler_options_.reset(new CompilerOptions);
   verification_results_.reset(new VerificationResults(compiler_options_.get()));
   method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  callbacks_.reset(new CompilerCallbacksImpl(verification_results_.get(),
-                                             method_inliner_map_.get()));
+  callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
+                                              method_inliner_map_.get()));
   timer_.reset(new CumulativeLogger("Compilation times"));
   compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
                                             verification_results_.get(),
@@ -111,12 +115,14 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ScratchFile tmp;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "lue.art");
   OatWriter oat_writer(class_linker->GetBootClassPath(),
                        42U,
                        4096U,
-                       "lue.art",
                        compiler_driver_.get(),
-                       &timings);
+                       &timings,
+                       &key_value_store);
   bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                             !kIsTargetBuild,
                                             class_linker->GetBootClassPath(),
@@ -136,7 +142,7 @@
   ASSERT_EQ(1U, oat_header.GetDexFileCount());  // core
   ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum());
   ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin());
-  ASSERT_EQ("lue.art", oat_header.GetImageFileLocation());
+  ASSERT_EQ("lue.art", std::string(oat_header.GetStoreValueByKey(OatHeader::kImageLocationKey)));
 
   const DexFile* dex_file = java_lang_dex_file_;
   uint32_t dex_file_checksum = dex_file->GetLocationChecksum();
@@ -180,7 +186,7 @@
   EXPECT_EQ(80U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(77 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
@@ -189,20 +195,20 @@
     std::vector<const DexFile*> dex_files;
     uint32_t image_file_location_oat_checksum = 0;
     uint32_t image_file_location_oat_begin = 0;
-    const std::string image_file_location;
-    OatHeader oat_header(instruction_set,
-                         instruction_set_features,
-                         &dex_files,
-                         image_file_location_oat_checksum,
-                         image_file_location_oat_begin,
-                         image_file_location);
-    ASSERT_TRUE(oat_header.IsValid());
+    OatHeader* oat_header = OatHeader::Create(instruction_set,
+                                              instruction_set_features,
+                                              &dex_files,
+                                              image_file_location_oat_checksum,
+                                              image_file_location_oat_begin,
+                                              nullptr);
+    ASSERT_NE(oat_header, nullptr);
+    ASSERT_TRUE(oat_header->IsValid());
 
-    char* magic = const_cast<char*>(oat_header.GetMagic());
+    char* magic = const_cast<char*>(oat_header->GetMagic());
     strcpy(magic, "");  // bad magic
-    ASSERT_FALSE(oat_header.IsValid());
+    ASSERT_FALSE(oat_header->IsValid());
     strcpy(magic, "oat\n000");  // bad version
-    ASSERT_FALSE(oat_header.IsValid());
+    ASSERT_FALSE(oat_header->IsValid());
 }
 
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4b6d501..63a3c8c 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -22,6 +22,7 @@
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
+#include "compiled_class.h"
 #include "dex_file-inl.h"
 #include "dex/verification_results.h"
 #include "gc/space/space.h"
@@ -49,19 +50,19 @@
 OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
                      uint32_t image_file_location_oat_checksum,
                      uintptr_t image_file_location_oat_begin,
-                     const std::string& image_file_location,
                      const CompilerDriver* compiler,
-                     TimingLogger* timings)
+                     TimingLogger* timings,
+                     SafeMap<std::string, std::string>* key_value_store)
   : compiler_driver_(compiler),
     dex_files_(&dex_files),
     image_file_location_oat_checksum_(image_file_location_oat_checksum),
     image_file_location_oat_begin_(image_file_location_oat_begin),
-    image_file_location_(image_file_location),
+    key_value_store_(key_value_store),
     oat_header_(NULL),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
     size_oat_header_(0),
-    size_oat_header_image_file_location_(0),
+    size_oat_header_key_value_store_(0),
     size_dex_file_(0),
     size_interpreter_to_interpreter_bridge_(0),
     size_interpreter_to_compiled_code_bridge_(0),
@@ -89,6 +90,8 @@
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0) {
+  CHECK(key_value_store != nullptr);
+
   size_t offset;
   {
     TimingLogger::ScopedTiming split("InitOatHeader", timings);
@@ -121,7 +124,8 @@
   size_ = offset;
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
-  CHECK(image_file_location.empty() == compiler->IsImage());
+  CHECK_EQ(compiler->IsImage(),
+           key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
 }
 
 OatWriter::~OatWriter() {
@@ -716,16 +720,14 @@
 }
 
 size_t OatWriter::InitOatHeader() {
-  // create the OatHeader
-  oat_header_ = new OatHeader(compiler_driver_->GetInstructionSet(),
-                              compiler_driver_->GetInstructionSetFeatures(),
-                              dex_files_,
-                              image_file_location_oat_checksum_,
-                              image_file_location_oat_begin_,
-                              image_file_location_);
-  size_t offset = sizeof(*oat_header_);
-  offset += image_file_location_.size();
-  return offset;
+  oat_header_ = OatHeader::Create(compiler_driver_->GetInstructionSet(),
+                                  compiler_driver_->GetInstructionSetFeatures(),
+                                  dex_files_,
+                                  image_file_location_oat_checksum_,
+                                  image_file_location_oat_begin_,
+                                  key_value_store_);
+
+  return oat_header_->GetHeaderSize();
 }
 
 size_t OatWriter::InitOatDexFiles(size_t offset) {
@@ -864,17 +866,13 @@
 bool OatWriter::Write(OutputStream* out) {
   const size_t file_offset = out->Seek(0, kSeekCurrent);
 
-  if (!out->WriteFully(oat_header_, sizeof(*oat_header_))) {
+  size_t header_size = oat_header_->GetHeaderSize();
+  if (!out->WriteFully(oat_header_, header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  size_oat_header_ += sizeof(*oat_header_);
-
-  if (!out->WriteFully(image_file_location_.data(), image_file_location_.size())) {
-    PLOG(ERROR) << "Failed to write oat header image file location to " << out->GetLocation();
-    return false;
-  }
-  size_oat_header_image_file_location_ += image_file_location_.size();
+  size_oat_header_ += sizeof(OatHeader);
+  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
 
   if (!WriteTables(out, file_offset)) {
     LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
@@ -909,7 +907,7 @@
     DO_STAT(size_dex_file_alignment_);
     DO_STAT(size_executable_offset_alignment_);
     DO_STAT(size_oat_header_);
-    DO_STAT(size_oat_header_image_file_location_);
+    DO_STAT(size_oat_header_key_value_store_);
     DO_STAT(size_dex_file_);
     DO_STAT(size_interpreter_to_interpreter_bridge_);
     DO_STAT(size_interpreter_to_compiled_code_bridge_);
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index dbecb95..3d34956 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -79,9 +79,9 @@
   OatWriter(const std::vector<const DexFile*>& dex_files,
             uint32_t image_file_location_oat_checksum,
             uintptr_t image_file_location_oat_begin,
-            const std::string& image_file_location,
             const CompilerDriver* compiler,
-            TimingLogger* timings);
+            TimingLogger* timings,
+            SafeMap<std::string, std::string>* key_value_store);
 
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
@@ -253,9 +253,9 @@
   // dependencies on the image.
   uint32_t image_file_location_oat_checksum_;
   uintptr_t image_file_location_oat_begin_;
-  std::string image_file_location_;
 
   // data to write
+  SafeMap<std::string, std::string>* key_value_store_;
   OatHeader* oat_header_;
   std::vector<OatDexFile*> oat_dex_files_;
   std::vector<OatClass*> oat_classes_;
@@ -274,7 +274,7 @@
   uint32_t size_dex_file_alignment_;
   uint32_t size_executable_offset_alignment_;
   uint32_t size_oat_header_;
-  uint32_t size_oat_header_image_file_location_;
+  uint32_t size_oat_header_key_value_store_;
   uint32_t size_dex_file_;
   uint32_t size_interpreter_to_interpreter_bridge_;
   uint32_t size_interpreter_to_compiled_code_bridge_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index cc995f7..f594129 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -15,16 +15,29 @@
  * limitations under the License.
  */
 
+#include "builder.h"
+
+#include "class_linker.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
-#include "builder.h"
+#include "driver/compiler_driver-inl.h"
+#include "mirror/art_field.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "primitive.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
 
 namespace art {
 
+static bool IsTypeSupported(Primitive::Type type) {
+  return type != Primitive::kPrimFloat && type != Primitive::kPrimDouble;
+}
+
 void HGraphBuilder::InitializeLocals(uint16_t count) {
   graph_->SetNumberOfVRegs(count);
   locals_.SetSize(count);
@@ -93,7 +106,7 @@
 }
 
 template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset) {
+void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_offset) {
   HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   T* comparison = new (arena_) T(first, second);
@@ -110,7 +123,7 @@
 }
 
 template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, int32_t dex_offset) {
+void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_offset) {
   HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   T* comparison = new (arena_) T(value, GetIntConstant(0));
   current_block_->AddInstruction(comparison);
@@ -305,28 +318,23 @@
   uint32_t argument_index = start_index;
   for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
     Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    switch (type) {
-      case Primitive::kPrimFloat:
-      case Primitive::kPrimDouble:
-        return false;
-
-      default: {
-        if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) {
-          LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
-                       << " at " << dex_offset;
-          // We do not implement non sequential register pair.
-          return false;
-        }
-        HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
-        invoke->SetArgumentAt(argument_index, arg);
-        if (type == Primitive::kPrimLong) {
-          i++;
-        }
-      }
+    if (!IsTypeSupported(type)) {
+      return false;
+    }
+    if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) {
+      LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
+                   << " at " << dex_offset;
+      // We do not implement non sequential register pair.
+      return false;
+    }
+    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+    invoke->SetArgumentAt(argument_index, arg);
+    if (type == Primitive::kPrimLong) {
+      i++;
     }
   }
 
-  if (return_type == Primitive::kPrimDouble || return_type == Primitive::kPrimFloat) {
+  if (!IsTypeSupported(return_type)) {
     return false;
   }
 
@@ -335,6 +343,84 @@
   return true;
 }
 
+/**
+ * Helper class to add HTemporary instructions. This class is used when
+ * converting a DEX instruction to multiple HInstruction, and where those
+ * instructions do not die at the following instruction, but instead spans
+ * multiple instructions.
+ */
+class Temporaries : public ValueObject {
+ public:
+  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
+    graph_->UpdateNumberOfTemporaries(count_);
+  }
+
+  void Add(HInstruction* instruction) {
+    // We currently only support vreg size temps.
+    DCHECK(instruction->GetType() != Primitive::kPrimLong
+           && instruction->GetType() != Primitive::kPrimDouble);
+    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
+    instruction->GetBlock()->AddInstruction(temp);
+    DCHECK(temp->GetPrevious() == instruction);
+  }
+
+ private:
+  HGraph* const graph_;
+
+  // The total number of temporaries that will be used.
+  const size_t count_;
+
+  // Current index in the temporary stack, updated by `Add`.
+  size_t index_;
+};
+
+bool HGraphBuilder::BuildFieldAccess(const Instruction& instruction,
+                                     uint32_t dex_offset,
+                                     bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_22c();
+  uint32_t obj_reg = instruction.VRegB_22c();
+  uint16_t field_index = instruction.VRegC_22c();
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ArtField> resolved_field(hs.NewHandle(
+      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa)));
+
+  if (resolved_field.Get() == nullptr) {
+    return false;
+  }
+  if (resolved_field->IsVolatile()) {
+    return false;
+  }
+
+  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
+  if (!IsTypeSupported(field_type)) {
+    return false;
+  }
+
+  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
+  current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_offset));
+  if (is_put) {
+    Temporaries temps(graph_, 1);
+    HInstruction* null_check = current_block_->GetLastInstruction();
+    // We need one temporary for the null check.
+    temps.Add(null_check);
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    current_block_->AddInstruction(new (arena_) HInstanceFieldSet(
+        null_check,
+        value,
+        resolved_field->GetOffset()));
+  } else {
+    current_block_->AddInstruction(new (arena_) HInstanceFieldGet(
+        current_block_->GetLastInstruction(),
+        field_type,
+        resolved_field->GetOffset()));
+
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  return true;
+}
+
 bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset) {
   if (current_block_ == nullptr) {
     return true;  // Dead code
@@ -581,6 +667,32 @@
     case Instruction::NOP:
       break;
 
+    case Instruction::IGET:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_SHORT: {
+      if (!BuildFieldAccess(instruction, dex_offset, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::IPUT:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT: {
+      if (!BuildFieldAccess(instruction, dex_offset, true)) {
+        return false;
+      }
+      break;
+    }
+
     default:
       return false;
   }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index ee32ca8..f94b8e8 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
 #include "dex_file.h"
+#include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "primitive.h"
 #include "utils/allocation.h"
@@ -32,7 +33,8 @@
  public:
   HGraphBuilder(ArenaAllocator* arena,
                 DexCompilationUnit* dex_compilation_unit = nullptr,
-                const DexFile* dex_file = nullptr)
+                const DexFile* dex_file = nullptr,
+                CompilerDriver* driver = nullptr)
       : arena_(arena),
         branch_targets_(arena, 0),
         locals_(arena, 0),
@@ -43,7 +45,8 @@
         constant0_(nullptr),
         constant1_(nullptr),
         dex_file_(dex_file),
-        dex_compilation_unit_(dex_compilation_unit) { }
+        dex_compilation_unit_(dex_compilation_unit),
+        compiler_driver_(driver) {}
 
   HGraph* BuildGraph(const DexFile::CodeItem& code);
 
@@ -84,11 +87,13 @@
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset);
-  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_offset);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_offset);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
+  bool BuildFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_get);
+
   // Builds an invocation node and returns whether the instruction is supported.
   bool BuildInvoke(const Instruction& instruction,
                    uint32_t dex_offset,
@@ -117,6 +122,7 @@
 
   const DexFile* const dex_file_;
   DexCompilationUnit* const dex_compilation_unit_;
+  CompilerDriver* const compiler_driver_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b8332ad..e0db0f1 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -38,7 +38,8 @@
 
   DCHECK_EQ(frame_size_, kUninitializedFrameSize);
   ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs()
-                   + GetGraph()->GetNumberOfVRegs()
+                   + GetGraph()->GetNumberOfLocalVRegs()
+                   + GetGraph()->GetNumberOfTemporaries()
                    + 1 /* filler */);
   GenerateFrameEntry();
 
@@ -54,6 +55,7 @@
       current->Accept(instruction_visitor);
     }
   }
+  GenerateSlowPaths();
 
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -79,6 +81,7 @@
       current->Accept(instruction_visitor);
     }
   }
+  GenerateSlowPaths();
 
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -86,6 +89,12 @@
   GetAssembler()->FinalizeInstructions(code);
 }
 
+void CodeGenerator::GenerateSlowPaths() {
+  for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) {
+    slow_paths_.Get(i)->EmitNativeCode(this);
+  }
+}
+
 size_t CodeGenerator::AllocateFreeRegisterInternal(
     bool* blocked_registers, size_t number_of_registers) const {
   for (size_t regno = 0; regno < number_of_registers; regno++) {
@@ -94,10 +103,42 @@
       return regno;
     }
   }
-  LOG(FATAL) << "Unreachable";
   return -1;
 }
 
+void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots) {
+  SetFrameSize(RoundUp(
+      number_of_spill_slots * kVRegSize
+      + kVRegSize  // Art method
+      + FrameEntrySpillSize(),
+      kStackAlignment));
+}
+
+Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
+  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
+  // Use the temporary region (right below the dex registers).
+  int32_t slot = GetFrameSize() - FrameEntrySpillSize()
+                                - kVRegSize  // filler
+                                - (number_of_locals * kVRegSize)
+                                - ((1 + temp->GetIndex()) * kVRegSize);
+  return Location::StackSlot(slot);
+}
+
+int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
+  uint16_t reg_number = local->GetRegNumber();
+  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
+  if (reg_number >= number_of_locals) {
+    // Local is a parameter of the method. It is stored in the caller's frame.
+    return GetFrameSize() + kVRegSize  // ART method
+                          + (reg_number - number_of_locals) * kVRegSize;
+  } else {
+    // Local is a temporary in this method. It is stored in this method's frame.
+    return GetFrameSize() - FrameEntrySpillSize()
+                          - kVRegSize  // filler.
+                          - (number_of_locals * kVRegSize)
+                          + (reg_number * kVRegSize);
+  }
+}
 
 void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
   LocationSummary* locations = instruction->GetLocations();
@@ -162,13 +203,6 @@
       locations->SetTempAt(i, loc);
     }
   }
-
-  // Make all registers available for the return value.
-  for (size_t i = 0, e = GetNumberOfRegisters(); i < e; ++i) {
-    blocked_registers_[i] = false;
-  }
-  SetupBlockedRegisters(blocked_registers_);
-
   Location result_location = locations->Out();
   if (result_location.IsUnallocated()) {
     switch (result_location.GetPolicy()) {
@@ -187,6 +221,12 @@
 
 void CodeGenerator::InitLocations(HInstruction* instruction) {
   if (instruction->GetLocations() == nullptr) {
+    if (instruction->IsTemporary()) {
+      HInstruction* previous = instruction->GetPrevious();
+      Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
+      Move(previous, temp_location, instruction);
+      previous->GetLocations()->SetOut(temp_location);
+    }
     return;
   }
   AllocateRegistersLocally(instruction);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ae2f030..18e3e5a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,12 +30,13 @@
 static size_t constexpr kVRegSize = 4;
 static size_t constexpr kUninitializedFrameSize = 0;
 
+class CodeGenerator;
 class DexCompilationUnit;
 
 class CodeAllocator {
  public:
-  CodeAllocator() { }
-  virtual ~CodeAllocator() { }
+  CodeAllocator() {}
+  virtual ~CodeAllocator() {}
 
   virtual uint8_t* Allocate(size_t size) = 0;
 
@@ -48,6 +49,23 @@
   uintptr_t native_pc;
 };
 
+class SlowPathCode : public ArenaObject {
+ public:
+  SlowPathCode() : entry_label_(), exit_label_() {}
+  virtual ~SlowPathCode() {}
+
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
+
+ private:
+  Label entry_label_;
+  Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
+};
+
 class CodeGenerator : public ArenaObject {
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
@@ -78,7 +96,10 @@
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
-  virtual void ComputeFrameSize(size_t number_of_spill_slots) = 0;
+  void ComputeFrameSize(size_t number_of_spill_slots);
+  virtual size_t FrameEntrySpillSize() const = 0;
+  int32_t GetStackSlot(HLocal* local) const;
+  Location GetTemporaryLocation(HTemporary* temp) const;
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -99,6 +120,12 @@
     pc_infos_.Add(pc_info);
   }
 
+  void AddSlowPath(SlowPathCode* slow_path) {
+    slow_paths_.Add(slow_path);
+  }
+
+  void GenerateSlowPaths();
+
   void BuildMappingTable(std::vector<uint8_t>* vector) const;
   void BuildVMapTable(std::vector<uint8_t>* vector) const;
   void BuildNativeGCMap(
@@ -110,6 +137,7 @@
         graph_(graph),
         block_labels_(graph->GetArena(), 0),
         pc_infos_(graph->GetArena(), 32),
+        slow_paths_(graph->GetArena(), 8),
         blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {}
   ~CodeGenerator() {}
 
@@ -138,6 +166,7 @@
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
   GrowableArray<PcInfo> pc_infos_;
+  GrowableArray<SlowPathCode*> slow_paths_;
 
   // Temporary data structure used when doing register allocation.
   bool* const blocked_registers_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d87c14b..73c2d48 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -24,8 +24,6 @@
 #include "utils/arm/assembler_arm.h"
 #include "utils/arm/managed_register_arm.h"
 
-#define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
-
 namespace art {
 
 arm::ArmManagedRegister Location::AsArm() const {
@@ -34,6 +32,27 @@
 
 namespace arm {
 
+#define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
+
+class NullCheckSlowPathARM : public SlowPathCode {
+ public:
+  explicit NullCheckSlowPathARM(uint32_t dex_pc) : dex_pc_(dex_pc) {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowNullPointer).Int32Value();
+    __ ldr(LR, Address(TR, offset));
+    __ blx(LR);
+    codegen->RecordPcInfo(dex_pc_);
+  }
+
+ private:
+  const uint32_t dex_pc_;
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
+};
+
+#undef __
+#define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
 inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
@@ -63,7 +82,7 @@
   return EQ;        // Unreachable.
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
 static constexpr int kCurrentMethodStackOffset = 0;
 
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -80,6 +99,10 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this) {}
 
+size_t CodeGeneratorARM::FrameEntrySpillSize() const {
+  return kNumberOfPushedRegistersAtEntry * kArmWordSize;
+}
+
 static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
   return blocked_registers + kNumberOfAllocIds;
 }
@@ -88,12 +111,23 @@
                                                        bool* blocked_registers) const {
   switch (type) {
     case Primitive::kPrimLong: {
-      size_t reg = AllocateFreeRegisterInternal(
-          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs);
       ArmManagedRegister pair =
           ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
       blocked_registers[pair.AsRegisterPairLow()] = true;
       blocked_registers[pair.AsRegisterPairHigh()] = true;
+       // Block all other register pairs that share a register with `pair`.
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        ArmManagedRegister current =
+            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairLow() == pair.AsRegisterPairHigh()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) {
+          blocked_register_pairs[i] = true;
+        }
+      }
       return pair;
     }
 
@@ -103,7 +137,16 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters);
+      int reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters);
+      // Block all register pairs that contain `reg`.
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        ArmManagedRegister current =
+            ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
+          blocked_register_pairs[i] = true;
+        }
+      }
       return ArmManagedRegister::FromCoreRegister(static_cast<Register>(reg));
     }
 
@@ -140,13 +183,12 @@
   blocked_registers[IP] = true;
 
   // TODO: We currently don't use Quick's callee saved registers.
+  // We always save and restore R6 and R7 to make sure we can use three
+  // register pairs for long operations.
   blocked_registers[R5] = true;
-  blocked_registers[R6] = true;
-  blocked_registers[R7] = true;
   blocked_registers[R8] = true;
   blocked_registers[R10] = true;
   blocked_registers[R11] = true;
-  blocked_register_pairs[R6_R7] = true;
 }
 
 size_t CodeGeneratorARM::GetNumberOfRegisters() const {
@@ -162,17 +204,9 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-void CodeGeneratorARM::ComputeFrameSize(size_t number_of_spill_slots) {
-  SetFrameSize(RoundUp(
-      number_of_spill_slots * kVRegSize
-      + kVRegSize  // Art method
-      + kNumberOfPushedRegistersAtEntry * kArmWordSize,
-      kStackAlignment));
-}
-
 void CodeGeneratorARM::GenerateFrameEntry() {
-  core_spill_mask_ |= (1 << LR);
-  __ PushList((1 << LR));
+  core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7);
+  __ PushList(1 << LR | 1 << R6 | 1 << R7);
 
   // The return PC has already been pushed on the stack.
   __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize));
@@ -181,30 +215,13 @@
 
 void CodeGeneratorARM::GenerateFrameExit() {
   __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize);
-  __ PopList((1 << PC));
+  __ PopList(1 << PC | 1 << R6 | 1 << R7);
 }
 
 void CodeGeneratorARM::Bind(Label* label) {
   __ Bind(label);
 }
 
-int32_t CodeGeneratorARM::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs();
-  uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
-  if (reg_number >= number_of_vregs - number_of_in_vregs) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kVRegSize  // ART method
-                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kArmWordSize)
-                          - kVRegSize  // filler.
-                          - (number_of_vregs * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
 Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
@@ -399,9 +416,7 @@
         LOG(FATAL) << "Unimplemented type " << instruction->GetType();
     }
   } else {
-    // This can currently only happen when the instruction that requests the move
-    // is the next to be compiled.
-    DCHECK_EQ(instruction->GetNext(), move_for);
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
     switch (instruction->GetType()) {
       case Primitive::kPrimBoolean:
       case Primitive::kPrimByte:
@@ -448,7 +463,12 @@
 
 void LocationsBuilderARM::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location::Any());
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  if (condition->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
+  }
   if_instr->SetLocations(locations);
 }
 
@@ -482,7 +502,9 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
+  if (comp->NeedsMaterialization()) {
+    locations->SetOut(Location::RequiresRegister());
+  }
   comp->SetLocations(locations);
 }
 
@@ -960,6 +982,147 @@
   LOG(FATAL) << "Unreachable";
 }
 
+void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsArm().AsCoreRegister();
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+  Primitive::Type field_type = instruction->InputAt(1)->GetType();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      Register value = locations->InAt(1).AsArm().AsCoreRegister();
+      __ StoreToOffset(kStoreByte, value, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      Register value = locations->InAt(1).AsArm().AsCoreRegister();
+      __ StoreToOffset(kStoreHalfword, value, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      Register value = locations->InAt(1).AsArm().AsCoreRegister();
+      __ StoreToOffset(kStoreWord, value, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      ArmManagedRegister value = locations->InAt(1).AsArm();
+      __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow(), obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << field_type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << field_type;
+  }
+}
+
+void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsArm().AsCoreRegister();
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+
+  switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean: {
+      Register out = locations->Out().AsArm().AsCoreRegister();
+      __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimByte: {
+      Register out = locations->Out().AsArm().AsCoreRegister();
+      __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimShort: {
+      Register out = locations->Out().AsArm().AsCoreRegister();
+      __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimChar: {
+      Register out = locations->Out().AsArm().AsCoreRegister();
+      __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      Register out = locations->Out().AsArm().AsCoreRegister();
+      __ LoadFromOffset(kLoadWord, out, obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      // TODO: support volatile.
+      ArmManagedRegister out = locations->Out().AsArm();
+      __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow(), obj, offset);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+  }
+}
+
+void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  // TODO: Have a normalization phase that makes this instruction never used.
+  locations->SetOut(Location::SameAsFirstInput());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj = locations->InAt(0);
+  DCHECK(obj.Equals(locations->Out()));
+
+  if (obj.IsRegister()) {
+    __ cmp(obj.AsArm().AsCoreRegister(), ShifterOperand(0));
+  }
+  __ b(slow_path->GetEntryLabel(), EQ);
+}
+
+void LocationsBuilderARM::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp) {
+  // Nothing to do, this is driven by the code generator.
+}
+
 void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) {
   LOG(FATAL) << "Unreachable";
 }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c46c1b1..1b5974f 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -126,7 +126,6 @@
   explicit CodeGeneratorARM(HGraph* graph);
   virtual ~CodeGeneratorARM() { }
 
-  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
@@ -136,6 +135,8 @@
     return kArmWordSize;
   }
 
+  virtual size_t FrameEntrySpillSize() const OVERRIDE;
+
   virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
     return &location_builder_;
   }
@@ -153,7 +154,6 @@
       Primitive::Type type, bool* blocked_registers) const OVERRIDE;
   virtual size_t GetNumberOfRegisters() const OVERRIDE;
 
-  int32_t GetStackSlot(HLocal* local) const;
   virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
   virtual size_t GetNumberOfCoreRegisters() const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 572d494..4e69a0c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -24,8 +24,6 @@
 #include "mirror/art_method.h"
 #include "thread.h"
 
-#define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
-
 namespace art {
 
 x86::X86ManagedRegister Location::AsX86() const {
@@ -34,6 +32,26 @@
 
 namespace x86 {
 
+#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
+
+class NullCheckSlowPathX86 : public SlowPathCode {
+ public:
+  explicit NullCheckSlowPathX86(uint32_t dex_pc) : dex_pc_(dex_pc) {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer)));
+    codegen->RecordPcInfo(dex_pc_);
+  }
+
+ private:
+  const uint32_t dex_pc_;
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
+};
+
+#undef __
+#define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
+
 inline Condition X86Condition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return kEqual;
@@ -65,12 +83,8 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this) {}
 
-void CodeGeneratorX86::ComputeFrameSize(size_t number_of_spill_slots) {
-  SetFrameSize(RoundUp(
-      number_of_spill_slots * kVRegSize
-      + kVRegSize  // Art method
-      + kNumberOfPushedRegistersAtEntry * kX86WordSize,
-      kStackAlignment));
+size_t CodeGeneratorX86::FrameEntrySpillSize() const {
+  return kNumberOfPushedRegistersAtEntry * kX86WordSize;
 }
 
 static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
@@ -107,8 +121,18 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters);
-      return X86ManagedRegister::FromCpuRegister(static_cast<Register>(reg));
+      Register reg = static_cast<Register>(
+          AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters));
+      // Block all register pairs that contain `reg`.
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        X86ManagedRegister current =
+            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
+          blocked_register_pairs[i] = true;
+        }
+      }
+      return X86ManagedRegister::FromCpuRegister(reg);
     }
 
     case Primitive::kPrimFloat:
@@ -176,24 +200,6 @@
   __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
 }
 
-int32_t CodeGeneratorX86::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs();
-  uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
-  if (reg_number >= number_of_vregs - number_of_in_vregs) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kVRegSize  // ART method
-                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86WordSize)
-                          - kVRegSize  // filler.
-                          - (number_of_vregs * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
-
 Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
@@ -392,9 +398,7 @@
         LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
     }
   } else {
-    // This can currently only happen when the instruction that requests the move
-    // is the next to be compiled.
-    DCHECK_EQ(instruction->GetNext(), move_for);
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
     switch (instruction->GetType()) {
       case Primitive::kPrimBoolean:
       case Primitive::kPrimByte:
@@ -441,7 +445,12 @@
 
 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location::Any());
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  if (condition->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
+  }
   if_instr->SetLocations(locations);
 }
 
@@ -520,7 +529,9 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
-  locations->SetOut(Location::SameAsFirstInput());
+  if (comp->NeedsMaterialization()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
   comp->SetLocations(locations);
 }
 
@@ -915,7 +926,7 @@
 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
   locations->SetOut(Location::RequiresRegister());
   compare->SetLocations(locations);
 }
@@ -928,11 +939,21 @@
       Label less, greater, done;
       Register output = locations->Out().AsX86().AsCpuRegister();
       X86ManagedRegister left = locations->InAt(0).AsX86();
-      X86ManagedRegister right = locations->InAt(1).AsX86();
-      __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh());
+      Location right = locations->InAt(1);
+      if (right.IsRegister()) {
+        __ cmpl(left.AsRegisterPairHigh(), right.AsX86().AsRegisterPairHigh());
+      } else {
+        DCHECK(right.IsDoubleStackSlot());
+        __ cmpl(left.AsRegisterPairHigh(), Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+      }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
-      __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow());
+      if (right.IsRegister()) {
+        __ cmpl(left.AsRegisterPairLow(), right.AsX86().AsRegisterPairLow());
+      } else {
+        DCHECK(right.IsDoubleStackSlot());
+        __ cmpl(left.AsRegisterPairLow(), Address(ESP, right.GetStackIndex()));
+      }
       __ movl(output, Immediate(0));
       __ j(kEqual, &done);
       __ j(kBelow, &less);  // Unsigned compare.
@@ -965,6 +986,158 @@
   LOG(FATAL) << "Unreachable";
 }
 
+void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  Primitive::Type field_type = instruction->InputAt(1)->GetType();
+  if (field_type == Primitive::kPrimBoolean || field_type == Primitive::kPrimByte) {
+    // Ensure the value is in a byte register.
+    locations->SetInAt(1, X86CpuLocation(EAX));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsX86().AsCpuRegister();
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+  Primitive::Type field_type = instruction->InputAt(1)->GetType();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      ByteRegister value = locations->InAt(1).AsX86().AsByteRegister();
+      __ movb(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      Register value = locations->InAt(1).AsX86().AsCpuRegister();
+      __ movw(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      Register value = locations->InAt(1).AsX86().AsCpuRegister();
+      __ movl(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      X86ManagedRegister value = locations->InAt(1).AsX86();
+      __ movl(Address(obj, offset), value.AsRegisterPairLow());
+      __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh());
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << field_type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << field_type;
+  }
+}
+
+void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsX86().AsCpuRegister();
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+
+  switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean: {
+      Register out = locations->Out().AsX86().AsCpuRegister();
+      __ movzxb(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimByte: {
+      Register out = locations->Out().AsX86().AsCpuRegister();
+      __ movsxb(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimShort: {
+      Register out = locations->Out().AsX86().AsCpuRegister();
+      __ movsxw(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimChar: {
+      Register out = locations->Out().AsX86().AsCpuRegister();
+      __ movzxw(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      Register out = locations->Out().AsX86().AsCpuRegister();
+      __ movl(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      // TODO: support volatile.
+      X86ManagedRegister out = locations->Out().AsX86();
+      __ movl(out.AsRegisterPairLow(), Address(obj, offset));
+      __ movl(out.AsRegisterPairHigh(), Address(obj, kX86WordSize + offset));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+  }
+}
+
+void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::Any());
+  // TODO: Have a normalization phase that makes this instruction never used.
+  locations->SetOut(Location::SameAsFirstInput());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj = locations->InAt(0);
+  DCHECK(obj.Equals(locations->Out()));
+
+  if (obj.IsRegister()) {
+    __ cmpl(obj.AsX86().AsCpuRegister(), Immediate(0));
+  } else {
+    DCHECK(locations->InAt(0).IsStackSlot());
+    __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
+  }
+  __ j(kEqual, slow_path->GetEntryLabel());
+}
+
+void LocationsBuilderX86::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp) {
+  // Nothing to do, this is driven by the code generator.
+}
+
 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) {
   LOG(FATAL) << "Unreachable";
 }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 8a8216a..d622d2a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -128,7 +128,6 @@
   explicit CodeGeneratorX86(HGraph* graph);
   virtual ~CodeGeneratorX86() { }
 
-  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
@@ -138,6 +137,8 @@
     return kX86WordSize;
   }
 
+  virtual size_t FrameEntrySpillSize() const OVERRIDE;
+
   virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
     return &location_builder_;
   }
@@ -155,7 +156,6 @@
   virtual ManagedRegister AllocateFreeRegister(
       Primitive::Type type, bool* blocked_registers) const OVERRIDE;
 
-  int32_t GetStackSlot(HLocal* local) const;
   virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
   virtual size_t GetNumberOfCoreRegisters() const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dc1d616..e3ce5ce 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -25,8 +25,6 @@
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/managed_register_x86_64.h"
 
-#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
-
 namespace art {
 
 x86_64::X86_64ManagedRegister Location::AsX86_64() const {
@@ -35,6 +33,26 @@
 
 namespace x86_64 {
 
+#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
+
+class NullCheckSlowPathX86_64 : public SlowPathCode {
+ public:
+  explicit NullCheckSlowPathX86_64(uint32_t dex_pc) : dex_pc_(dex_pc) {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
+    codegen->RecordPcInfo(dex_pc_);
+  }
+
+ private:
+  const uint32_t dex_pc_;
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
+};
+
+#undef __
+#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
+
 inline Condition X86_64Condition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return kEqual;
@@ -73,6 +91,10 @@
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this) {}
 
+size_t CodeGeneratorX86_64::FrameEntrySpillSize() const {
+  return kNumberOfPushedRegistersAtEntry * kX86_64WordSize;
+}
+
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
       : HGraphVisitor(graph),
         assembler_(codegen->GetAssembler()),
@@ -119,16 +141,6 @@
   blocked_registers[R15] = true;
 }
 
-void CodeGeneratorX86_64::ComputeFrameSize(size_t number_of_spill_slots) {
-  // Add the current ART method to the frame size, the return PC, and the filler.
-  SetFrameSize(RoundUp(
-      number_of_spill_slots * kVRegSize
-      + kVRegSize  // filler
-      + kVRegSize  // Art method
-      + kNumberOfPushedRegistersAtEntry * kX86_64WordSize,
-      kStackAlignment));
-}
-
 void CodeGeneratorX86_64::GenerateFrameEntry() {
   // Create a fake register to mimic Quick.
   static const int kFakeReturnRegister = 16;
@@ -152,23 +164,6 @@
   __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
 }
 
-int32_t CodeGeneratorX86_64::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs();
-  uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
-  if (reg_number >= number_of_vregs - number_of_in_vregs) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kVRegSize  // ART method
-                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86_64WordSize)
-                          - kVRegSize
-                          - (number_of_vregs * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
 Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
@@ -265,9 +260,7 @@
         LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
     }
   } else {
-    // This can currently only happen when the instruction that requests the move
-    // is the next to be compiled.
-    DCHECK_EQ(instruction->GetNext(), move_for);
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
     switch (instruction->GetType()) {
       case Primitive::kPrimBoolean:
       case Primitive::kPrimByte:
@@ -311,7 +304,12 @@
 
 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location::Any());
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  if (condition->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
+  }
   if_instr->SetLocations(locations);
 }
 
@@ -385,7 +383,9 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
+  if (comp->NeedsMaterialization()) {
+    locations->SetOut(Location::RequiresRegister());
+  }
   comp->SetLocations(locations);
 }
 
@@ -827,6 +827,141 @@
   LOG(FATAL) << "Unimplemented";
 }
 
+void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister();
+  CpuRegister value = locations->InAt(1).AsX86_64().AsCpuRegister();
+  size_t offset = instruction->GetFieldOffset().SizeValue();
+  Primitive::Type field_type = instruction->InputAt(1)->GetType();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      __ movb(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      __ movw(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      __ movl(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      __ movq(Address(obj, offset), value);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << field_type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << field_type;
+  }
+}
+
+void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister();
+  CpuRegister out = locations->Out().AsX86_64().AsCpuRegister();
+  size_t offset = instruction->GetFieldOffset().SizeValue();
+
+  switch (instruction->GetType()) {
+    case Primitive::kPrimBoolean: {
+      __ movzxb(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimByte: {
+      __ movsxb(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimShort: {
+      __ movsxw(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimChar: {
+      __ movzxw(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      __ movl(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      __ movq(out, Address(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::Any());
+  // TODO: Have a normalization phase that makes this instruction never used.
+  locations->SetOut(Location::SameAsFirstInput());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj = locations->InAt(0);
+  DCHECK(obj.Equals(locations->Out()));
+
+  if (obj.IsRegister()) {
+    __ cmpl(obj.AsX86_64().AsCpuRegister(), Immediate(0));
+  } else {
+    DCHECK(locations->InAt(0).IsStackSlot());
+    __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
+  }
+  __ j(kEqual, slow_path->GetEntryLabel());
+}
+
+void LocationsBuilderX86_64::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp) {
+  // Nothing to do, this is driven by the code generator.
+}
+
 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) {
   LOG(FATAL) << "Unimplemented";
 }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d347a4f..8283dda 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -125,7 +125,6 @@
   explicit CodeGeneratorX86_64(HGraph* graph);
   virtual ~CodeGeneratorX86_64() {}
 
-  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
@@ -135,6 +134,8 @@
     return kX86_64WordSize;
   }
 
+  virtual size_t FrameEntrySpillSize() const OVERRIDE;
+
   virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
     return &location_builder_;
   }
@@ -151,7 +152,6 @@
     return &move_resolver_;
   }
 
-  int32_t GetStackSlot(HLocal* local) const;
   virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
   virtual size_t GetNumberOfRegisters() const OVERRIDE {
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 987c5f2..a6e5ca9 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -182,9 +182,9 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the phi.
-  interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
+  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
   range = interval->GetFirstRange();
-  ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(3)->GetLifetimePosition());
+  ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(2)->GetLifetimePosition());
   ASSERT_EQ(22u, range->GetStart());
   ASSERT_EQ(25u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 2d0bc39..1a4d745 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -152,32 +152,32 @@
   // return a;
   //
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi, equal test)
+  // (constant0, constant4, constant5, phi)
   const char* expected =
     "Block 0\n"  // entry block
-    "  live in: (00000)\n"
-    "  live out: (11100)\n"
-    "  kill: (11100)\n"
+    "  live in: (0000)\n"
+    "  live out: (1110)\n"
+    "  kill: (1110)\n"
     "Block 1\n"  // block with if
-    "  live in: (11100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00010)\n"
+    "  live in: (1110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 2\n"  // else block
-    "  live in: (01000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
+    "  live in: (0100)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
     "Block 3\n"  // then block
-    "  live in: (00100)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
+    "  live in: (0010)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
     "Block 4\n"  // return block
-    "  live in: (00000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00001)\n"
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0001)\n"
     "Block 5\n"  // exit block
-    "  live in: (00000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n";
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -197,31 +197,34 @@
   //   a = 4;
   // }
   // return a;
+  //
+  // Bitsets are made of:
+  // (constant0, constant4, phi)
   const char* expected =
     "Block 0\n"  // entry block
-    "  live in: (0000)\n"
-    "  live out: (1100)\n"
-    "  kill: (1100)\n"
+    "  live in: (000)\n"
+    "  live out: (110)\n"
+    "  kill: (110)\n"
     "Block 1\n"  // block with if
-    "  live in: (1100)\n"
-    "  live out: (1100)\n"
-    "  kill: (0010)\n"
+    "  live in: (110)\n"
+    "  live out: (110)\n"
+    "  kill: (000)\n"
     "Block 2\n"  // else block
-    "  live in: (0100)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n"
+    "  live in: (010)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
     "Block 3\n"  // return block
-    "  live in: (0000)\n"
-    "  live out: (0000)\n"
-    "  kill: (0001)\n"
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (001)\n"
     "Block 4\n"  // exit block
-    "  live in: (0000)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n"
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
     "Block 5\n"  // block to avoid critical edge. Predecessor is 1, successor is 3.
-    "  live in: (1000)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n";
+    "  live in: (100)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -239,31 +242,33 @@
   //   a = 4;
   // }
   // return;
+  // Bitsets are made of:
+  // (constant0, constant4, phi)
   const char* expected =
     "Block 0\n"  // entry block
-    "  live in: (0000)\n"
-    "  live out: (1100)\n"
-    "  kill: (1100)\n"
+    "  live in: (000)\n"
+    "  live out: (110)\n"
+    "  kill: (110)\n"
     "Block 1\n"  // pre header
-    "  live in: (1100)\n"
-    "  live out: (0100)\n"
-    "  kill: (0000)\n"
+    "  live in: (110)\n"
+    "  live out: (010)\n"
+    "  kill: (000)\n"
     "Block 2\n"  // loop header
-    "  live in: (0100)\n"
-    "  live out: (0100)\n"
-    "  kill: (0011)\n"
+    "  live in: (010)\n"
+    "  live out: (010)\n"
+    "  kill: (001)\n"
     "Block 3\n"  // back edge
-    "  live in: (0100)\n"
-    "  live out: (0100)\n"
-    "  kill: (0000)\n"
+    "  live in: (010)\n"
+    "  live out: (010)\n"
+    "  kill: (000)\n"
     "Block 4\n"  // return block
-    "  live in: (0000)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n"
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
     "Block 5\n"  // exit block
-    "  live in: (0000)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n";
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n";
 
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -283,31 +288,33 @@
   //   a = 4;
   // }
   // return 5;
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi)
   const char* expected =
     "Block 0\n"
-    "  live in: (00000)\n"
-    "  live out: (11100)\n"
-    "  kill: (11100)\n"
+    "  live in: (0000)\n"
+    "  live out: (1110)\n"
+    "  kill: (1110)\n"
     "Block 1\n"
-    "  live in: (11100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (1110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 2\n"  // loop header
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00011)\n"
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0001)\n"
     "Block 3\n"  // back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 4\n"  // return block
-    "  live in: (00100)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
+    "  live in: (0010)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
     "Block 5\n"  // exit block
-    "  live in: (00000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n";
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
 
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -330,36 +337,36 @@
   // }
   // return a;
   // Bitsets are made of:
-  // (constant0, constant4, phi, equal test)
+  // (constant0, constant4, phi)
   const char* expected =
     "Block 0\n"
-    "  live in: (0000)\n"
-    "  live out: (1100)\n"
-    "  kill: (1100)\n"
+    "  live in: (000)\n"
+    "  live out: (110)\n"
+    "  kill: (110)\n"
     "Block 1\n"
-    "  live in: (1100)\n"
-    "  live out: (1100)\n"
-    "  kill: (0000)\n"
+    "  live in: (110)\n"
+    "  live out: (110)\n"
+    "  kill: (000)\n"
     "Block 2\n"  // loop header
-    "  live in: (0100)\n"
-    "  live out: (0110)\n"
-    "  kill: (0011)\n"
+    "  live in: (010)\n"
+    "  live out: (011)\n"
+    "  kill: (001)\n"
     "Block 3\n"  // back edge
-    "  live in: (0100)\n"
-    "  live out: (0100)\n"
-    "  kill: (0000)\n"
+    "  live in: (010)\n"
+    "  live out: (010)\n"
+    "  kill: (000)\n"
     "Block 4\n"  // pre loop header
-    "  live in: (1100)\n"
-    "  live out: (0100)\n"
-    "  kill: (0000)\n"
+    "  live in: (110)\n"
+    "  live out: (010)\n"
+    "  kill: (000)\n"
     "Block 5\n"  // return block
-    "  live in: (0010)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n"
+    "  live in: (001)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
     "Block 6\n"  // exit block
-    "  live in: (0000)\n"
-    "  live out: (0000)\n"
-    "  kill: (0000)\n";
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -377,45 +384,44 @@
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
-  // (constant0, constant4, constant5, equal in block 1, phi in block 8, phi in block 4,
-  //  equal in block 4)
+  // (constant0, constant4, constant5, phi in block 8, phi in block 4)
   const char* expected =
     "Block 0\n"
-    "  live in: (0000000)\n"
-    "  live out: (1110000)\n"
-    "  kill: (1110000)\n"
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
     "Block 1\n"
-    "  live in: (1110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0001000)\n"
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 2\n"
-    "  live in: (0100000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (01000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 3\n"
-    "  live in: (0010000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00100)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 4\n"  // loop header
-    "  live in: (0000000)\n"
-    "  live out: (0000010)\n"
-    "  kill: (0000011)\n"
+    "  live in: (00000)\n"
+    "  live out: (00001)\n"
+    "  kill: (00001)\n"
     "Block 5\n"  // back edge
-    "  live in: (0000010)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00001)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 6\n"  // return block
-    "  live in: (0000010)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00001)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 7\n"  // exit block
-    "  live in: (0000000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 8\n"  // synthesized pre header
-    "  live in: (0000000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000100)\n";
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00010)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -432,45 +438,44 @@
 
 TEST(LivenessTest, Loop6) {
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3,
-  //  phi in block 8)
+  // (constant0, constant4, constant5, phi in block 2, phi in block 8)
   const char* expected =
     "Block 0\n"
-    "  live in: (0000000)\n"
-    "  live out: (1110000)\n"
-    "  kill: (1110000)\n"
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
     "Block 1\n"
-    "  live in: (1110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 2\n"  // loop header
-    "  live in: (0110000)\n"
-    "  live out: (0111000)\n"
-    "  kill: (0001100)\n"
+    "  live in: (01100)\n"
+    "  live out: (01110)\n"
+    "  kill: (00010)\n"
     "Block 3\n"
-    "  live in: (0110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000010)\n"
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 4\n"  // original back edge
-    "  live in: (0110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 5\n"  // original back edge
-    "  live in: (0110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 6\n"  // return block
-    "  live in: (0001000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00010)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 7\n"  // exit block
-    "  live in: (0000000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 8\n"  // synthesized back edge
-    "  live in: (0110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000001)\n";
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00001)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -488,45 +493,44 @@
 
 TEST(LivenessTest, Loop7) {
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3,
-  //  phi in block 6)
+  // (constant0, constant4, constant5, phi in block 2, phi in block 6)
   const char* expected =
     "Block 0\n"
-    "  live in: (0000000)\n"
-    "  live out: (1110000)\n"
-    "  kill: (1110000)\n"
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
     "Block 1\n"
-    "  live in: (1110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 2\n"  // loop header
-    "  live in: (0110000)\n"
-    "  live out: (0111000)\n"
-    "  kill: (0001100)\n"
+    "  live in: (01100)\n"
+    "  live out: (01110)\n"
+    "  kill: (00010)\n"
     "Block 3\n"
-    "  live in: (0110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000010)\n"
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 4\n"  // loop exit
-    "  live in: (0010000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00100)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 5\n"  // back edge
-    "  live in: (0110000)\n"
-    "  live out: (0110000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
     "Block 6\n"  // return block
-    "  live in: (0000000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000001)\n"
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00001)\n"
     "Block 7\n"  // exit block
-    "  live in: (0000000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n"
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
     "Block 8\n"  // synthesized block to avoid critical edge.
-    "  live in: (0001000)\n"
-    "  live out: (0000000)\n"
-    "  kill: (0000000)\n";
+    "  live in: (00010)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 9292084..e87b044 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -18,6 +18,8 @@
 #define ART_COMPILER_OPTIMIZING_NODES_H_
 
 #include "locations.h"
+#include "offsets.h"
+#include "primitive.h"
 #include "utils/allocation.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
@@ -75,6 +77,7 @@
         maximum_number_of_out_vregs_(0),
         number_of_vregs_(0),
         number_of_in_vregs_(0),
+        number_of_temporaries_(0),
         current_instruction_id_(0) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
@@ -112,6 +115,14 @@
     maximum_number_of_out_vregs_ = std::max(new_value, maximum_number_of_out_vregs_);
   }
 
+  void UpdateNumberOfTemporaries(size_t count) {
+    number_of_temporaries_ = std::max(count, number_of_temporaries_);
+  }
+
+  size_t GetNumberOfTemporaries() const {
+    return number_of_temporaries_;
+  }
+
   void SetNumberOfVRegs(uint16_t number_of_vregs) {
     number_of_vregs_ = number_of_vregs;
   }
@@ -128,6 +139,10 @@
     return number_of_in_vregs_;
   }
 
+  uint16_t GetNumberOfLocalVRegs() const {
+    return number_of_vregs_ - number_of_in_vregs_;
+  }
+
   const GrowableArray<HBasicBlock*>& GetReversePostOrder() const {
     return reverse_post_order_;
   }
@@ -163,6 +178,9 @@
   // The number of virtual registers used by parameters of this method.
   uint16_t number_of_in_vregs_;
 
+  // The number of temporaries that will be needed for the baseline compiler.
+  size_t number_of_temporaries_;
+
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int current_instruction_id_;
 
@@ -364,6 +382,8 @@
     }
   }
 
+  bool IsInLoop() const { return loop_information_ != nullptr; }
+
   // Returns wheter this block dominates the blocked passed as parameter.
   bool Dominates(HBasicBlock* block) const;
 
@@ -415,6 +435,10 @@
   M(StoreLocal)                                            \
   M(Sub)                                                   \
   M(Compare)                                               \
+  M(InstanceFieldGet)                                      \
+  M(InstanceFieldSet)                                      \
+  M(NullCheck)                                             \
+  M(Temporary)                                             \
 
 
 #define FORWARD_DECLARATION(type) class H##type;
@@ -468,6 +492,8 @@
 
   HBasicBlock* GetBlock() const { return block_; }
   void SetBlock(HBasicBlock* block) { block_ = block; }
+  bool IsInBlock() const { return block_ != nullptr; }
+  bool IsInLoop() const { return block_->IsInLoop(); }
 
   virtual size_t InputCount() const  = 0;
   virtual HInstruction* InputAt(size_t i) const = 0;
@@ -496,6 +522,7 @@
   HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; }
 
   bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; }
+  bool HasEnvironmentUses() const { return env_uses_ != nullptr; }
 
   size_t NumberOfUses() const {
     // TODO: Optimize this method if it is used outside of the HGraphVisualizer.
@@ -1225,7 +1252,8 @@
   HPhi(ArenaAllocator* arena, uint32_t reg_number, size_t number_of_inputs, Primitive::Type type)
       : inputs_(arena, number_of_inputs),
         reg_number_(reg_number),
-        type_(type) {
+        type_(type),
+        is_live_(false) {
     inputs_.SetSize(number_of_inputs);
   }
 
@@ -1243,17 +1271,113 @@
 
   uint32_t GetRegNumber() const { return reg_number_; }
 
+  void SetDead() { is_live_ = false; }
+  void SetLive() { is_live_ = true; }
+  bool IsDead() const { return !is_live_; }
+  bool IsLive() const { return is_live_; }
+
   DECLARE_INSTRUCTION(Phi);
 
  protected:
   GrowableArray<HInstruction*> inputs_;
   const uint32_t reg_number_;
   Primitive::Type type_;
+  bool is_live_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HPhi);
 };
 
+class HNullCheck : public HExpression<1> {
+ public:
+  HNullCheck(HInstruction* value, uint32_t dex_pc)
+      : HExpression(value->GetType()), dex_pc_(dex_pc) {
+    SetRawInputAt(0, value);
+  }
+
+  virtual bool NeedsEnvironment() const { return true; }
+
+  uint32_t GetDexPc() const { return dex_pc_; }
+
+  DECLARE_INSTRUCTION(NullCheck);
+
+ private:
+  const uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(HNullCheck);
+};
+
+class FieldInfo : public ValueObject {
+ public:
+  explicit FieldInfo(MemberOffset field_offset)
+      : field_offset_(field_offset) {}
+
+  MemberOffset GetFieldOffset() const { return field_offset_; }
+
+ private:
+  const MemberOffset field_offset_;
+};
+
+class HInstanceFieldGet : public HExpression<1> {
+ public:
+  HInstanceFieldGet(HInstruction* value,
+                    Primitive::Type field_type,
+                    MemberOffset field_offset)
+      : HExpression(field_type), field_info_(field_offset) {
+    SetRawInputAt(0, value);
+  }
+
+  MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
+
+  DECLARE_INSTRUCTION(InstanceFieldGet);
+
+ private:
+  const FieldInfo field_info_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet);
+};
+
+class HInstanceFieldSet : public HTemplateInstruction<2> {
+ public:
+  HInstanceFieldSet(HInstruction* object,
+                    HInstruction* value,
+                    MemberOffset field_offset)
+      : field_info_(field_offset) {
+    SetRawInputAt(0, object);
+    SetRawInputAt(1, value);
+  }
+
+  MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
+
+  DECLARE_INSTRUCTION(InstanceFieldSet);
+
+ private:
+  const FieldInfo field_info_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet);
+};
+
+/**
+ * Some DEX instructions are folded into multiple HInstructions that need
+ * to stay live until the last HInstruction. This class
+ * is used as a marker for the baseline compiler to ensure its preceding
+ * HInstruction stays live. `index` is the temporary number that is used
+ * for knowing the stack offset where to store the instruction.
+ */
+class HTemporary : public HTemplateInstruction<0> {
+ public:
+  explicit HTemporary(size_t index) : index_(index) {}
+
+  size_t GetIndex() const { return index_; }
+
+  DECLARE_INSTRUCTION(Temporary);
+
+ private:
+  const size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HTemporary);
+};
+
 class MoveOperands : public ArenaObject {
  public:
   MoveOperands(Location source, Location destination)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b4d7fff..b621e51 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
 #include "graph_visualizer.h"
 #include "nodes.h"
 #include "register_allocator.h"
+#include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/arena_allocator.h"
 
@@ -101,7 +102,7 @@
 
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file);
+  HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file, GetCompilerDriver());
 
   HGraph* graph = builder.BuildGraph(*code_item);
   if (graph == nullptr) {
@@ -129,8 +130,11 @@
     graph->BuildDominatorTree();
     graph->TransformToSSA();
     visualizer.DumpGraph("ssa");
-
     graph->FindNaturalLoops();
+
+    SsaRedundantPhiElimination(graph).Run();
+    SsaDeadPhiElimination(graph).Run();
+
     SsaLivenessAnalysis liveness(*graph, codegen);
     liveness.Analyze();
     visualizer.DumpGraph(kLivenessPassName);
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 7d4cd1a..e35ff56 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
 
 #include "base/macros.h"
+#include "primitive.h"
 #include "utils/growable_array.h"
 
 namespace art {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 50ea00f..fbdc0b9 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -204,9 +204,12 @@
       // All inputs of an instruction must be live.
       for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
         HInstruction* input = current->InputAt(i);
-        DCHECK(input->HasSsaIndex());
-        live_in->SetBit(input->GetSsaIndex());
-        input->GetLiveInterval()->AddUse(current, i, false);
+        // Some instructions 'inline' their inputs, that is they do not need
+        // to be materialized.
+        if (input->HasSsaIndex()) {
+          live_in->SetBit(input->GetSsaIndex());
+          input->GetLiveInterval()->AddUse(current, i, false);
+        }
       }
 
       if (current->HasEnvironment()) {
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
new file mode 100644
index 0000000..13fa03f
--- /dev/null
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ssa_phi_elimination.h"
+
+namespace art {
+
+void SsaDeadPhiElimination::Run() {
+  // Add to the worklist phis referenced by non-phi instructions.
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      if (phi->HasEnvironmentUses()) {
+        // TODO: Do we want to keep that phi alive?
+        continue;
+      }
+      for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) {
+        HUseListNode<HInstruction>* current = it.Current();
+        HInstruction* user = current->GetUser();
+        if (!user->IsPhi()) {
+          worklist_.Add(phi);
+          phi->SetLive();
+        } else {
+          phi->SetDead();
+        }
+      }
+    }
+  }
+
+  // Process the worklist by propagating liveness to phi inputs.
+  while (!worklist_.IsEmpty()) {
+    HPhi* phi = worklist_.Pop();
+    for (HInputIterator it(phi); !it.Done(); it.Advance()) {
+      HInstruction* input = it.Current();
+      if (input->IsPhi() && input->AsPhi()->IsDead()) {
+        worklist_.Add(input->AsPhi());
+        input->AsPhi()->SetLive();
+      }
+    }
+  }
+
+  // Remove phis that are not live. Visit in post order to ensure
+  // we only remove phis with no users (dead phis might use dead phis).
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    HInstruction* current = block->GetFirstPhi();
+    HInstruction* next = nullptr;
+    while (current != nullptr) {
+      next = current->GetNext();
+      if (current->AsPhi()->IsDead()) {
+        block->RemovePhi(current->AsPhi());
+      }
+      current = next;
+    }
+  }
+}
+
+void SsaRedundantPhiElimination::Run() {
+  // Add all phis in the worklist.
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      worklist_.Add(it.Current()->AsPhi());
+    }
+  }
+
+  while (!worklist_.IsEmpty()) {
+    HPhi* phi = worklist_.Pop();
+
+    // If the phi has already been processed, continue.
+    if (!phi->IsInBlock()) {
+      continue;
+    }
+
+    // Find if the inputs of the phi are the same instruction.
+    HInstruction* candidate = phi->InputAt(0);
+    // A loop phi cannot have itself as the first phi.
+    DCHECK_NE(phi, candidate);
+
+    for (size_t i = 1; i < phi->InputCount(); ++i) {
+      HInstruction* input = phi->InputAt(i);
+      // For a loop phi, If the input is the phi, the phi is still candidate for
+      // elimination.
+      if (input != candidate && input != phi) {
+        candidate = nullptr;
+        break;
+      }
+    }
+
+    // If the inputs are not the same, continue.
+    if (candidate == nullptr) {
+      continue;
+    }
+
+    if (phi->IsInLoop()) {
+      // Because we're updating the users of this phi, we may have new
+      // phis candidate for elimination if this phi is in a loop. Add phis that
+      // used this phi to the worklist.
+      for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) {
+        HUseListNode<HInstruction>* current = it.Current();
+        HInstruction* user = current->GetUser();
+        if (user->IsPhi()) {
+          worklist_.Add(user->AsPhi());
+        }
+      }
+    }
+    phi->ReplaceWith(candidate);
+    phi->GetBlock()->RemovePhi(phi);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
new file mode 100644
index 0000000..5274f09
--- /dev/null
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * Optimization phase that removes dead phis from the graph. Dead phis are unused
+ * phis, or phis only used by other phis.
+ */
+class SsaDeadPhiElimination : public ValueObject {
+ public:
+  explicit SsaDeadPhiElimination(HGraph* graph)
+      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+
+  void Run();
+
+ private:
+  HGraph* const graph_;
+  GrowableArray<HPhi*> worklist_;
+
+  static constexpr size_t kDefaultWorklistSize = 8;
+
+  DISALLOW_COPY_AND_ASSIGN(SsaDeadPhiElimination);
+};
+
+/**
+ * Removes redundant phis that may have been introduced when doing SSA conversion.
+ * For example, when entering a loop, we create phis for all live registers. These
+ * registers might be updated with the same value, or not updated at all. We can just
+ * replace the phi with the value when entering the loop.
+ */
+class SsaRedundantPhiElimination : public ValueObject {
+ public:
+  explicit SsaRedundantPhiElimination(HGraph* graph)
+      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+
+  void Run();
+
+ private:
+  HGraph* const graph_;
+  GrowableArray<HPhi*> worklist_;
+
+  static constexpr size_t kDefaultWorklistSize = 8;
+
+  DISALLOW_COPY_AND_ASSIGN(SsaRedundantPhiElimination);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
index 5fa0ccb..315ca09 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/output_stream_test.cc
@@ -17,6 +17,7 @@
 #include "file_output_stream.h"
 #include "vector_output_stream.h"
 
+#include "base/unix_file/fd_file.h"
 #include "base/logging.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 009b227..5b97ba0 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -595,8 +595,7 @@
     // FIXME: Who sets the flags here?
     LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
   }
-  ___ Cmp(reg_x(in_reg.AsCoreRegister()), 0);
-  ___ B(&exit, COND_OP(EQ));
+  ___ Cbz(reg_x(in_reg.AsCoreRegister()), &exit);
   LoadFromOffset(out_reg.AsCoreRegister(), in_reg.AsCoreRegister(), 0);
   ___ Bind(&exit);
 }
@@ -607,8 +606,7 @@
   Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
   exception_blocks_.push_back(current_exception);
   LoadFromOffset(scratch.AsCoreRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value());
-  ___ Cmp(reg_x(scratch.AsCoreRegister()), 0);
-  ___ B(current_exception->Entry(), COND_OP(NE));
+  ___ Cbnz(reg_x(scratch.AsCoreRegister()), current_exception->Entry());
 }
 
 void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 4d5d613..78738d8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1671,16 +1671,31 @@
                                  const std::vector<ManagedRegister>& spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    pushq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    if (spill.IsCpuRegister()) {
+      pushq(spill.AsCpuRegister());
+      gpr_count++;
+    }
   }
   // return address then method on stack
-  addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(frame_size) + (spill_regs.size() * kFramePointerSize) +
-                                   sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
-                                   kFramePointerSize /*return address*/));
+  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
+                          - (gpr_count * kFramePointerSize)
+                          - kFramePointerSize /*return address*/;
+  subq(CpuRegister(RSP), Immediate(rest_of_frame));
+  // spill xmms
+  int64_t offset = rest_of_frame;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset -= sizeof(double);
+      movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+    }
+  }
 
   DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
-  subq(CpuRegister(RSP), Immediate(4));
+
   movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
@@ -1707,9 +1722,24 @@
 void X86_64Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
+  int gpr_count = 0;
+  // unspill xmms
+  int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    popq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset += sizeof(double);
+      movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+    } else {
+      gpr_count++;
+    }
+  }
+  addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize));
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    if (spill.IsCpuRegister()) {
+      popq(spill.AsCpuRegister());
+    }
   }
   ret();
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index f7bad8b..dc1758f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -246,11 +246,9 @@
   str << "pushq %rsi\n";
   str << "pushq %r10\n";
   // 2) Move down the stack pointer.
-  ssize_t displacement = -static_cast<ssize_t>(frame_size) + spill_regs.size() * 8 +
-      sizeof(StackReference<mirror::ArtMethod>) + 8;
-  str << "addq $" << displacement << ", %rsp\n";
-  // 3) Make space for method reference, and store it.
-  str << "subq $4, %rsp\n";
+  ssize_t displacement = static_cast<ssize_t>(frame_size) - (spill_regs.size() * 8 + 8);
+  str << "subq $" << displacement << ", %rsp\n";
+  // 3) Store method reference.
   str << "movl %edi, (%rsp)\n";
   // 4) Entry spills.
   str << "movq %rax, " << frame_size + 0 << "(%rsp)\n";
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 80e7724..6d861d4 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -40,7 +40,8 @@
 #include "dex_file-inl.h"
 #include "dex/pass_driver_me_opts.h"
 #include "dex/verification_results.h"
-#include "driver/compiler_callbacks_impl.h"
+#include "dex/quick_compiler_callbacks.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "elf_fixup.h"
@@ -55,7 +56,6 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "oat_writer.h"
-#include "object_utils.h"
 #include "os.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
@@ -232,7 +232,7 @@
 class Dex2Oat {
  public:
   static bool Create(Dex2Oat** p_dex2oat,
-                     const Runtime::Options& runtime_options,
+                     const RuntimeOptions& runtime_options,
                      const CompilerOptions& compiler_options,
                      Compiler::Kind compiler_kind,
                      InstructionSet instruction_set,
@@ -336,7 +336,10 @@
                                       bool dump_passes,
                                       TimingLogger& timings,
                                       CumulativeLogger& compiler_phases_timings,
-                                      std::string profile_file) {
+                                      std::string profile_file,
+                                      SafeMap<std::string, std::string>* key_value_store) {
+    CHECK(key_value_store != nullptr);
+
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
     Thread* self = Thread::Current();
@@ -356,18 +359,18 @@
     }
 
     std::unique_ptr<CompilerDriver> driver(new CompilerDriver(compiler_options_,
-                                                        verification_results_,
-                                                        method_inliner_map_,
-                                                        compiler_kind_,
-                                                        instruction_set_,
-                                                        instruction_set_features_,
-                                                        image,
-                                                        image_classes.release(),
-                                                        thread_count_,
-                                                        dump_stats,
-                                                        dump_passes,
-                                                        &compiler_phases_timings,
-                                                        profile_file));
+                                                              verification_results_,
+                                                              method_inliner_map_,
+                                                              compiler_kind_,
+                                                              instruction_set_,
+                                                              instruction_set_features_,
+                                                              image,
+                                                              image_classes.release(),
+                                                              thread_count_,
+                                                              dump_stats,
+                                                              dump_passes,
+                                                              &compiler_phases_timings,
+                                                              profile_file));
 
     driver->GetCompiler()->SetBitcodeFileName(*driver.get(), bitcode_filename);
 
@@ -386,11 +389,15 @@
       image_file_location = image_space->GetImageFilename();
     }
 
+    if (!image_file_location.empty()) {
+      key_value_store->Put(OatHeader::kImageLocationKey, image_file_location);
+    }
+
     OatWriter oat_writer(dex_files, image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
-                         image_file_location,
                          driver.get(),
-                         &timings);
+                         &timings,
+                         key_value_store);
 
     t2.NewTiming("Writing ELF");
     if (!driver->WriteElf(android_root, is_host, dex_files, &oat_writer, oat_file)) {
@@ -452,7 +459,7 @@
     CHECK(method_inliner_map != nullptr);
   }
 
-  bool CreateRuntime(const Runtime::Options& runtime_options, InstructionSet instruction_set)
+  bool CreateRuntime(const RuntimeOptions& runtime_options, InstructionSet instruction_set)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
     if (!Runtime::Create(runtime_options, false)) {
       LOG(ERROR) << "Failed to create runtime";
@@ -467,6 +474,7 @@
       }
     }
     runtime->GetClassLinker()->FixupDexCaches(runtime->GetResolutionMethod());
+    runtime->GetClassLinker()->RunRootClinits();
     runtime_ = runtime;
     return true;
   }
@@ -733,20 +741,6 @@
   *parsed_value = value;
 }
 
-void CheckExplicitCheckOptions(InstructionSet isa, bool* explicit_null_checks,
-                               bool* explicit_so_checks, bool* explicit_suspend_checks) {
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-      break;  // All checks implemented, leave as is.
-
-    default:  // No checks implemented, reset all to explicit checks.
-      *explicit_null_checks = true;
-      *explicit_so_checks = true;
-      *explicit_suspend_checks = true;
-  }
-}
-
 static int dex2oat(int argc, char** argv) {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
@@ -830,10 +824,10 @@
   bool watch_dog_enabled = !kIsTargetBuild;
   bool generate_gdb_information = kIsDebugBuild;
 
-  bool explicit_null_checks = true;
-  bool explicit_so_checks = true;
-  bool explicit_suspend_checks = true;
-  bool has_explicit_checks_options = false;
+  // Checks are all explicit until we know the architecture.
+  bool implicit_null_checks = false;
+  bool implicit_so_checks = false;
+  bool implicit_suspend_checks = false;
 
   for (int i = 0; i < argc; i++) {
     const StringPiece option(argv[i]);
@@ -1010,31 +1004,6 @@
     } else if (option.starts_with("--dump-cfg-passes=")) {
       std::string dump_passes = option.substr(strlen("--dump-cfg-passes=")).data();
       PassDriverMEOpts::SetDumpPassList(dump_passes);
-    } else if (option.starts_with("--implicit-checks=")) {
-      std::string checks = option.substr(strlen("--implicit-checks=")).data();
-      std::vector<std::string> checkvec;
-      Split(checks, ',', checkvec);
-      for (auto& str : checkvec) {
-        std::string val = Trim(str);
-        if (val == "none") {
-          explicit_null_checks = true;
-          explicit_so_checks = true;
-          explicit_suspend_checks = true;
-        } else if (val == "null") {
-          explicit_null_checks = false;
-        } else if (val == "suspend") {
-          explicit_suspend_checks = false;
-        } else if (val == "stack") {
-          explicit_so_checks = false;
-        } else if (val == "all") {
-          explicit_null_checks = false;
-          explicit_so_checks = false;
-          explicit_suspend_checks = false;
-        } else {
-          Usage("--implicit-checks passed non-recognized value %s", val.c_str());
-        }
-      }
-      has_explicit_checks_options = true;
     } else if (option == "--include-patch-information") {
       include_patch_information = true;
       explicit_include_patch_information = true;
@@ -1167,31 +1136,43 @@
     Usage("Unknown --compiler-filter value %s", compiler_filter_string);
   }
 
-  CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks,
-                            &explicit_suspend_checks);
-
   if (!explicit_include_patch_information) {
     include_patch_information =
         (compiler_kind == Compiler::kQuick && CompilerOptions::kDefaultIncludePatchInformation);
   }
 
-  CompilerOptions compiler_options(compiler_filter,
-                                   huge_method_threshold,
-                                   large_method_threshold,
-                                   small_method_threshold,
-                                   tiny_method_threshold,
-                                   num_dex_methods_threshold,
-                                   generate_gdb_information,
-                                   include_patch_information,
-                                   top_k_profile_threshold,
-                                   include_debug_symbols,
-                                   explicit_null_checks,
-                                   explicit_so_checks,
-                                   explicit_suspend_checks
+  // Set the compilation target's implicit checks options.
+  switch (instruction_set) {
+    case kArm:
+    case kThumb2:
+    case kX86:
+      implicit_null_checks = true;
+      implicit_so_checks = true;
+      break;
+
+    default:
+      // Defaults are correct.
+      break;
+  }
+
+  std::unique_ptr<CompilerOptions> compiler_options(new CompilerOptions(compiler_filter,
+                                                                        huge_method_threshold,
+                                                                        large_method_threshold,
+                                                                        small_method_threshold,
+                                                                        tiny_method_threshold,
+                                                                        num_dex_methods_threshold,
+                                                                        generate_gdb_information,
+                                                                        include_patch_information,
+                                                                        top_k_profile_threshold,
+                                                                        include_debug_symbols,
+                                                                        implicit_null_checks,
+                                                                        implicit_so_checks,
+                                                                        implicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
-                                   , compiler_options.sea_ir_ = true;
+                                                                        , compiler_options.sea_ir_ =
+                                                                              true;
 #endif
-                                   );  // NOLINT(whitespace/parens)
+  ));  // NOLINT(whitespace/parens)
 
   // Done with usage checks, enable watchdog if requested
   WatchDog watch_dog(watch_dog_enabled);
@@ -1220,7 +1201,7 @@
   timings.StartTiming("dex2oat Setup");
   LOG(INFO) << CommandLine();
 
-  Runtime::Options runtime_options;
+  RuntimeOptions runtime_options;
   std::vector<const DexFile*> boot_class_path;
   if (boot_image_option.empty()) {
     size_t failure_count = OpenDexFiles(dex_filenames, dex_locations, boot_class_path);
@@ -1236,9 +1217,10 @@
     runtime_options.push_back(std::make_pair(runtime_args[i], nullptr));
   }
 
-  VerificationResults verification_results(&compiler_options);
+  std::unique_ptr<VerificationResults> verification_results(new VerificationResults(
+                                                            compiler_options.get()));
   DexFileToMethodInlinerMap method_inliner_map;
-  CompilerCallbacksImpl callbacks(&verification_results, &method_inliner_map);
+  QuickCompilerCallbacks callbacks(verification_results.get(), &method_inliner_map);
   runtime_options.push_back(std::make_pair("compilercallbacks", &callbacks));
   runtime_options.push_back(
       std::make_pair("imageinstructionset",
@@ -1247,11 +1229,11 @@
   Dex2Oat* p_dex2oat;
   if (!Dex2Oat::Create(&p_dex2oat,
                        runtime_options,
-                       compiler_options,
+                       *compiler_options,
                        compiler_kind,
                        instruction_set,
                        instruction_set_features,
-                       &verification_results,
+                       verification_results.get(),
                        &method_inliner_map,
                        thread_count)) {
     LOG(ERROR) << "Failed to create dex2oat";
@@ -1259,27 +1241,6 @@
   }
   std::unique_ptr<Dex2Oat> dex2oat(p_dex2oat);
 
-  // TODO: Not sure whether it's a good idea to allow anything else but the runtime option in
-  // this case at all, as we'll have to throw away produced code for a mismatch.
-  if (!has_explicit_checks_options) {
-    bool cross_compiling = true;
-    switch (kRuntimeISA) {
-      case kArm:
-      case kThumb2:
-        cross_compiling = instruction_set != kArm && instruction_set != kThumb2;
-        break;
-      default:
-        cross_compiling = instruction_set != kRuntimeISA;
-        break;
-    }
-    if (!cross_compiling) {
-      Runtime* runtime = Runtime::Current();
-      compiler_options.SetExplicitNullChecks(runtime->ExplicitNullChecks());
-      compiler_options.SetExplicitStackOverflowChecks(runtime->ExplicitStackOverflowChecks());
-      compiler_options.SetExplicitSuspendChecks(runtime->ExplicitSuspendChecks());
-    }
-  }
-
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now so that we don't starve GC.
   Thread* self = Thread::Current();
@@ -1365,32 +1326,50 @@
    * If we're not in interpret-only or verify-none mode, go ahead and compile small applications.
    * Don't bother to check if we're doing the image.
    */
-  if (!image && compiler_options.IsCompilationEnabled()) {
+  if (!image && compiler_options->IsCompilationEnabled()) {
     size_t num_methods = 0;
     for (size_t i = 0; i != dex_files.size(); ++i) {
       const DexFile* dex_file = dex_files[i];
       CHECK(dex_file != nullptr);
       num_methods += dex_file->NumMethodIds();
     }
-    if (num_methods <= compiler_options.GetNumDexMethodsThreshold()) {
-      compiler_options.SetCompilerFilter(CompilerOptions::kSpeed);
+    if (num_methods <= compiler_options->GetNumDexMethodsThreshold()) {
+      compiler_options->SetCompilerFilter(CompilerOptions::kSpeed);
       VLOG(compiler) << "Below method threshold, compiling anyways";
     }
   }
 
+  // Fill some values into the key-value store for the oat header.
+  std::unique_ptr<SafeMap<std::string, std::string> > key_value_store(
+      new SafeMap<std::string, std::string>());
+
+  // Insert some compiler things.
+  std::ostringstream oss;
+  for (int i = 0; i < argc; ++i) {
+    if (i > 0) {
+      oss << ' ';
+    }
+    oss << argv[i];
+  }
+  key_value_store->Put(OatHeader::kDex2OatCmdLineKey, oss.str());
+  oss.str("");  // Reset.
+  oss << kRuntimeISA;
+  key_value_store->Put(OatHeader::kDex2OatHostKey, oss.str());
+
   std::unique_ptr<const CompilerDriver> compiler(dex2oat->CreateOatFile(boot_image_option,
-                                                                  android_root,
-                                                                  is_host,
-                                                                  dex_files,
-                                                                  oat_file.get(),
-                                                                  bitcode_filename,
-                                                                  image,
-                                                                  image_classes,
-                                                                  dump_stats,
-                                                                  dump_passes,
-                                                                  timings,
-                                                                  compiler_phases_timings,
-                                                                  profile_file));
+                                                                        android_root,
+                                                                        is_host,
+                                                                        dex_files,
+                                                                        oat_file.get(),
+                                                                        bitcode_filename,
+                                                                        image,
+                                                                        image_classes,
+                                                                        dump_stats,
+                                                                        dump_passes,
+                                                                        timings,
+                                                                        compiler_phases_timings,
+                                                                        profile_file,
+                                                                        key_value_store.get()));
 
   if (compiler.get() == nullptr) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index a6f9a8a..101a55d 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -125,10 +125,11 @@
   DumpAddrReg(os, rex, reg_num);
 }
 
-static void DumpOpcodeReg(std::ostream& os, uint8_t rex, uint8_t reg) {
+static void DumpOpcodeReg(std::ostream& os, uint8_t rex, uint8_t reg,
+                          bool byte_operand, uint8_t size_override) {
   bool rex_b = (rex & REX_B) != 0;
   size_t reg_num = rex_b ? (reg + 8) : reg;
-  DumpReg0(os, rex, reg_num, false, 0);
+  DumpReg0(os, rex, reg_num, byte_operand, size_override);
 }
 
 enum SegmentPrefix {
@@ -955,6 +956,7 @@
     immediate_bytes = 1;
     byte_operand = true;
     reg_in_opcode = true;
+    byte_operand = true;
     break;
   case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF:
     if (rex == 0x48) {
@@ -1079,7 +1081,7 @@
   uint8_t rex_w = (supports_rex_ && target_specific) ? (rex | 0x48) : rex;
   if (reg_in_opcode) {
     DCHECK(!has_modrm);
-    DumpOpcodeReg(args, rex_w, *instr & 0x7);
+    DumpOpcodeReg(args, rex_w, *instr & 0x7, byte_operand, prefix[2]);
   }
   instr++;
   uint32_t address_bits = 0;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 12970fc..b8f20f3 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -29,6 +29,7 @@
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "disassembler.h"
+#include "field_helper.h"
 #include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
@@ -45,7 +46,6 @@
 #include "noop_compiler_callbacks.h"
 #include "oat.h"
 #include "oat_file-inl.h"
-#include "object_utils.h"
 #include "os.h"
 #include "runtime.h"
 #include "safe_map.h"
@@ -171,10 +171,18 @@
     os << "IMAGE FILE LOCATION OAT BEGIN:\n";
     os << StringPrintf("0x%08x\n\n", oat_header.GetImageFileLocationOatDataBegin());
 
-    os << "IMAGE FILE LOCATION:\n";
-    const std::string image_file_location(oat_header.GetImageFileLocation());
-    os << image_file_location;
-    os << "\n\n";
+    // Print the key-value store.
+    {
+      os << "KEY VALUE STORE:\n";
+      size_t index = 0;
+      const char* key;
+      const char* value;
+      while (oat_header.GetStoreKeyValuePairByIndex(index, &key, &value)) {
+        os << key << " = " << value << "\n";
+        index++;
+      }
+      os << "\n";
+    }
 
     os << "BEGIN:\n";
     os << reinterpret_cast<const void*>(oat_file_.Begin()) << "\n\n";
@@ -976,7 +984,7 @@
   const void* GetQuickOatCodeBegin(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const void* quick_code = m->GetEntryPointFromQuickCompiledCode();
-    if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
+    if (quick_code == Runtime::Current()->GetClassLinker()->GetQuickResolutionTrampoline()) {
       quick_code = oat_dumper_->GetQuickOatCode(m);
     }
     if (oat_dumper_->GetInstructionSet() == kThumb2) {
@@ -1540,7 +1548,7 @@
     return EXIT_SUCCESS;
   }
 
-  Runtime::Options options;
+  RuntimeOptions options;
   std::string image_option;
   std::string oat_option;
   std::string boot_image_option;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index dcf8c70..85b4e6d 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -26,6 +26,7 @@
 #include "base/stringprintf.h"
 #include "elf_utils.h"
 #include "elf_file.h"
+#include "gc/space/image_space.h"
 #include "image.h"
 #include "instruction_set.h"
 #include "mirror/art_field.h"
@@ -92,7 +93,7 @@
   }
 
   // Set up the runtime
-  Runtime::Options options;
+  RuntimeOptions options;
   NoopCompilerCallbacks callbacks;
   options.push_back(std::make_pair("compilercallbacks", &callbacks));
   std::string img = "-Ximage:" + image_location;
@@ -176,7 +177,7 @@
   }
 
   // Set up the runtime
-  Runtime::Options options;
+  RuntimeOptions options;
   NoopCompilerCallbacks callbacks;
   options.push_back(std::make_pair("compilercallbacks", &callbacks));
   std::string img = "-Ximage:" + image_location;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7f5cf0c..d2fc229 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -19,300 +19,304 @@
 include art/build/Android.common_build.mk
 
 LIBART_COMMON_SRC_FILES := \
-	atomic.cc.arm \
-	barrier.cc \
-	base/allocator.cc \
-	base/bit_vector.cc \
-	base/hex_dump.cc \
-	base/logging.cc \
-	base/mutex.cc \
-	base/scoped_flock.cc \
-	base/stringpiece.cc \
-	base/stringprintf.cc \
-	base/timing_logger.cc \
-	base/unix_file/fd_file.cc \
-	base/unix_file/mapped_file.cc \
-	base/unix_file/null_file.cc \
-	base/unix_file/random_access_file_utils.cc \
-	base/unix_file/string_file.cc \
-	check_jni.cc \
-	class_linker.cc \
-	common_throws.cc \
-	debugger.cc \
-	dex_file.cc \
-	dex_file_verifier.cc \
-	dex_instruction.cc \
-	elf_file.cc \
-	gc/allocator/dlmalloc.cc \
-	gc/allocator/rosalloc.cc \
-	gc/accounting/card_table.cc \
-	gc/accounting/gc_allocator.cc \
-	gc/accounting/heap_bitmap.cc \
-	gc/accounting/mod_union_table.cc \
-	gc/accounting/remembered_set.cc \
-	gc/accounting/space_bitmap.cc \
-	gc/collector/concurrent_copying.cc \
-	gc/collector/garbage_collector.cc \
-	gc/collector/immune_region.cc \
-	gc/collector/mark_compact.cc \
-	gc/collector/mark_sweep.cc \
-	gc/collector/partial_mark_sweep.cc \
-	gc/collector/semi_space.cc \
-	gc/collector/sticky_mark_sweep.cc \
-	gc/gc_cause.cc \
-	gc/heap.cc \
-	gc/reference_processor.cc \
-	gc/reference_queue.cc \
-	gc/space/bump_pointer_space.cc \
-	gc/space/dlmalloc_space.cc \
-	gc/space/image_space.cc \
-	gc/space/large_object_space.cc \
-	gc/space/malloc_space.cc \
-	gc/space/rosalloc_space.cc \
-	gc/space/space.cc \
-	gc/space/zygote_space.cc \
-	hprof/hprof.cc \
-	image.cc \
-	indirect_reference_table.cc \
-	instruction_set.cc \
-	instrumentation.cc \
-	intern_table.cc \
-	interpreter/interpreter.cc \
-	interpreter/interpreter_common.cc \
-	interpreter/interpreter_switch_impl.cc \
-	jdwp/jdwp_event.cc \
-	jdwp/jdwp_expand_buf.cc \
-	jdwp/jdwp_handler.cc \
-	jdwp/jdwp_main.cc \
-	jdwp/jdwp_request.cc \
-	jdwp/jdwp_socket.cc \
-	jdwp/object_registry.cc \
-	jni_internal.cc \
-	jobject_comparator.cc \
-	mem_map.cc \
-	memory_region.cc \
-	mirror/art_field.cc \
-	mirror/art_method.cc \
-	mirror/array.cc \
-	mirror/class.cc \
-	mirror/dex_cache.cc \
-	mirror/object.cc \
-	mirror/stack_trace_element.cc \
-	mirror/string.cc \
-	mirror/throwable.cc \
-	monitor.cc \
-	native/dalvik_system_DexFile.cc \
-	native/dalvik_system_VMDebug.cc \
-	native/dalvik_system_VMRuntime.cc \
-	native/dalvik_system_VMStack.cc \
-	native/dalvik_system_ZygoteHooks.cc \
-	native/java_lang_Class.cc \
-	native/java_lang_DexCache.cc \
-	native/java_lang_Object.cc \
-	native/java_lang_Runtime.cc \
-	native/java_lang_String.cc \
-	native/java_lang_System.cc \
-	native/java_lang_Thread.cc \
-	native/java_lang_Throwable.cc \
-	native/java_lang_VMClassLoader.cc \
-	native/java_lang_ref_Reference.cc \
-	native/java_lang_reflect_Array.cc \
-	native/java_lang_reflect_Constructor.cc \
-	native/java_lang_reflect_Field.cc \
-	native/java_lang_reflect_Method.cc \
-	native/java_lang_reflect_Proxy.cc \
-	native/java_util_concurrent_atomic_AtomicLong.cc \
-	native/org_apache_harmony_dalvik_ddmc_DdmServer.cc \
-	native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc \
-	native/sun_misc_Unsafe.cc \
-	oat.cc \
-	oat_file.cc \
-	offsets.cc \
-	os_linux.cc \
-	parsed_options.cc \
-	primitive.cc \
-	quick_exception_handler.cc \
-	quick/inline_method_analyser.cc \
-	reference_table.cc \
-	reflection.cc \
-	runtime.cc \
-	signal_catcher.cc \
-	stack.cc \
-	thread.cc \
-	thread_list.cc \
-	thread_pool.cc \
-	throw_location.cc \
-	trace.cc \
-	transaction.cc \
-	profiler.cc \
-	fault_handler.cc \
-	utf.cc \
-	utils.cc \
-	verifier/dex_gc_map.cc \
-	verifier/instruction_flags.cc \
-	verifier/method_verifier.cc \
-	verifier/reg_type.cc \
-	verifier/reg_type_cache.cc \
-	verifier/register_line.cc \
-	well_known_classes.cc \
-	zip_archive.cc
+  atomic.cc.arm \
+  barrier.cc \
+  base/allocator.cc \
+  base/bit_vector.cc \
+  base/hex_dump.cc \
+  base/logging.cc \
+  base/mutex.cc \
+  base/scoped_flock.cc \
+  base/stringpiece.cc \
+  base/stringprintf.cc \
+  base/timing_logger.cc \
+  base/unix_file/fd_file.cc \
+  base/unix_file/mapped_file.cc \
+  base/unix_file/null_file.cc \
+  base/unix_file/random_access_file_utils.cc \
+  base/unix_file/string_file.cc \
+  check_jni.cc \
+  class_linker.cc \
+  common_throws.cc \
+  debugger.cc \
+  dex_file.cc \
+  dex_file_verifier.cc \
+  dex_instruction.cc \
+  elf_file.cc \
+  field_helper.cc \
+  gc/allocator/dlmalloc.cc \
+  gc/allocator/rosalloc.cc \
+  gc/accounting/card_table.cc \
+  gc/accounting/gc_allocator.cc \
+  gc/accounting/heap_bitmap.cc \
+  gc/accounting/mod_union_table.cc \
+  gc/accounting/remembered_set.cc \
+  gc/accounting/space_bitmap.cc \
+  gc/collector/concurrent_copying.cc \
+  gc/collector/garbage_collector.cc \
+  gc/collector/immune_region.cc \
+  gc/collector/mark_compact.cc \
+  gc/collector/mark_sweep.cc \
+  gc/collector/partial_mark_sweep.cc \
+  gc/collector/semi_space.cc \
+  gc/collector/sticky_mark_sweep.cc \
+  gc/gc_cause.cc \
+  gc/heap.cc \
+  gc/reference_processor.cc \
+  gc/reference_queue.cc \
+  gc/space/bump_pointer_space.cc \
+  gc/space/dlmalloc_space.cc \
+  gc/space/image_space.cc \
+  gc/space/large_object_space.cc \
+  gc/space/malloc_space.cc \
+  gc/space/rosalloc_space.cc \
+  gc/space/space.cc \
+  gc/space/zygote_space.cc \
+  hprof/hprof.cc \
+  image.cc \
+  indirect_reference_table.cc \
+  instruction_set.cc \
+  instrumentation.cc \
+  intern_table.cc \
+  interpreter/interpreter.cc \
+  interpreter/interpreter_common.cc \
+  interpreter/interpreter_switch_impl.cc \
+  jdwp/jdwp_event.cc \
+  jdwp/jdwp_expand_buf.cc \
+  jdwp/jdwp_handler.cc \
+  jdwp/jdwp_main.cc \
+  jdwp/jdwp_request.cc \
+  jdwp/jdwp_socket.cc \
+  jdwp/object_registry.cc \
+  jni_internal.cc \
+  jobject_comparator.cc \
+  mem_map.cc \
+  memory_region.cc \
+  method_helper.cc \
+  mirror/art_field.cc \
+  mirror/art_method.cc \
+  mirror/array.cc \
+  mirror/class.cc \
+  mirror/dex_cache.cc \
+  mirror/object.cc \
+  mirror/reference.cc \
+  mirror/stack_trace_element.cc \
+  mirror/string.cc \
+  mirror/throwable.cc \
+  monitor.cc \
+  native/dalvik_system_DexFile.cc \
+  native/dalvik_system_VMDebug.cc \
+  native/dalvik_system_VMRuntime.cc \
+  native/dalvik_system_VMStack.cc \
+  native/dalvik_system_ZygoteHooks.cc \
+  native/java_lang_Class.cc \
+  native/java_lang_DexCache.cc \
+  native/java_lang_Object.cc \
+  native/java_lang_Runtime.cc \
+  native/java_lang_String.cc \
+  native/java_lang_System.cc \
+  native/java_lang_Thread.cc \
+  native/java_lang_Throwable.cc \
+  native/java_lang_VMClassLoader.cc \
+  native/java_lang_ref_Reference.cc \
+  native/java_lang_reflect_Array.cc \
+  native/java_lang_reflect_Constructor.cc \
+  native/java_lang_reflect_Field.cc \
+  native/java_lang_reflect_Method.cc \
+  native/java_lang_reflect_Proxy.cc \
+  native/java_util_concurrent_atomic_AtomicLong.cc \
+  native/org_apache_harmony_dalvik_ddmc_DdmServer.cc \
+  native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc \
+  native/sun_misc_Unsafe.cc \
+  oat.cc \
+  oat_file.cc \
+  object_lock.cc \
+  offsets.cc \
+  os_linux.cc \
+  parsed_options.cc \
+  primitive.cc \
+  quick_exception_handler.cc \
+  quick/inline_method_analyser.cc \
+  reference_table.cc \
+  reflection.cc \
+  runtime.cc \
+  signal_catcher.cc \
+  stack.cc \
+  thread.cc \
+  thread_list.cc \
+  thread_pool.cc \
+  throw_location.cc \
+  trace.cc \
+  transaction.cc \
+  profiler.cc \
+  fault_handler.cc \
+  utf.cc \
+  utils.cc \
+  verifier/dex_gc_map.cc \
+  verifier/instruction_flags.cc \
+  verifier/method_verifier.cc \
+  verifier/reg_type.cc \
+  verifier/reg_type_cache.cc \
+  verifier/register_line.cc \
+  well_known_classes.cc \
+  zip_archive.cc
 
 LIBART_COMMON_SRC_FILES += \
-	arch/context.cc \
-	arch/memcmp16.cc \
-	arch/arm/registers_arm.cc \
-	arch/arm64/registers_arm64.cc \
-	arch/x86/registers_x86.cc \
-	arch/mips/registers_mips.cc \
-	entrypoints/entrypoint_utils.cc \
-	entrypoints/interpreter/interpreter_entrypoints.cc \
-	entrypoints/jni/jni_entrypoints.cc \
-	entrypoints/math_entrypoints.cc \
-	entrypoints/portable/portable_alloc_entrypoints.cc \
-	entrypoints/portable/portable_cast_entrypoints.cc \
-	entrypoints/portable/portable_dexcache_entrypoints.cc \
-	entrypoints/portable/portable_field_entrypoints.cc \
-	entrypoints/portable/portable_fillarray_entrypoints.cc \
-	entrypoints/portable/portable_invoke_entrypoints.cc \
-	entrypoints/portable/portable_jni_entrypoints.cc \
-	entrypoints/portable/portable_lock_entrypoints.cc \
-	entrypoints/portable/portable_thread_entrypoints.cc \
-	entrypoints/portable/portable_throw_entrypoints.cc \
-	entrypoints/portable/portable_trampoline_entrypoints.cc \
-	entrypoints/quick/quick_alloc_entrypoints.cc \
-	entrypoints/quick/quick_cast_entrypoints.cc \
-	entrypoints/quick/quick_deoptimization_entrypoints.cc \
-	entrypoints/quick/quick_dexcache_entrypoints.cc \
-	entrypoints/quick/quick_field_entrypoints.cc \
-	entrypoints/quick/quick_fillarray_entrypoints.cc \
-	entrypoints/quick/quick_instrumentation_entrypoints.cc \
-	entrypoints/quick/quick_jni_entrypoints.cc \
-	entrypoints/quick/quick_lock_entrypoints.cc \
-	entrypoints/quick/quick_math_entrypoints.cc \
-	entrypoints/quick/quick_thread_entrypoints.cc \
-	entrypoints/quick/quick_throw_entrypoints.cc \
-	entrypoints/quick/quick_trampoline_entrypoints.cc
+  arch/context.cc \
+  arch/memcmp16.cc \
+  arch/arm/registers_arm.cc \
+  arch/arm64/registers_arm64.cc \
+  arch/x86/registers_x86.cc \
+  arch/mips/registers_mips.cc \
+  entrypoints/entrypoint_utils.cc \
+  entrypoints/interpreter/interpreter_entrypoints.cc \
+  entrypoints/jni/jni_entrypoints.cc \
+  entrypoints/math_entrypoints.cc \
+  entrypoints/portable/portable_alloc_entrypoints.cc \
+  entrypoints/portable/portable_cast_entrypoints.cc \
+  entrypoints/portable/portable_dexcache_entrypoints.cc \
+  entrypoints/portable/portable_field_entrypoints.cc \
+  entrypoints/portable/portable_fillarray_entrypoints.cc \
+  entrypoints/portable/portable_invoke_entrypoints.cc \
+  entrypoints/portable/portable_jni_entrypoints.cc \
+  entrypoints/portable/portable_lock_entrypoints.cc \
+  entrypoints/portable/portable_thread_entrypoints.cc \
+  entrypoints/portable/portable_throw_entrypoints.cc \
+  entrypoints/portable/portable_trampoline_entrypoints.cc \
+  entrypoints/quick/quick_alloc_entrypoints.cc \
+  entrypoints/quick/quick_cast_entrypoints.cc \
+  entrypoints/quick/quick_deoptimization_entrypoints.cc \
+  entrypoints/quick/quick_dexcache_entrypoints.cc \
+  entrypoints/quick/quick_field_entrypoints.cc \
+  entrypoints/quick/quick_fillarray_entrypoints.cc \
+  entrypoints/quick/quick_instrumentation_entrypoints.cc \
+  entrypoints/quick/quick_jni_entrypoints.cc \
+  entrypoints/quick/quick_lock_entrypoints.cc \
+  entrypoints/quick/quick_math_entrypoints.cc \
+  entrypoints/quick/quick_thread_entrypoints.cc \
+  entrypoints/quick/quick_throw_entrypoints.cc \
+  entrypoints/quick/quick_trampoline_entrypoints.cc
 
 # Source files that only compile with GCC.
 LIBART_GCC_ONLY_SRC_FILES := \
-	interpreter/interpreter_goto_table_impl.cc
+  interpreter/interpreter_goto_table_impl.cc
 
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
 LIBART_TARGET_SRC_FILES := \
-	$(LIBART_COMMON_SRC_FILES) \
-	base/logging_android.cc \
-	jdwp/jdwp_adb.cc \
-	monitor_android.cc \
-	runtime_android.cc \
-	thread_android.cc
+  $(LIBART_COMMON_SRC_FILES) \
+  base/logging_android.cc \
+  jdwp/jdwp_adb.cc \
+  monitor_android.cc \
+  runtime_android.cc \
+  thread_android.cc
 
 LIBART_TARGET_SRC_FILES_arm := \
-	arch/arm/context_arm.cc.arm \
-	arch/arm/entrypoints_init_arm.cc \
-	arch/arm/jni_entrypoints_arm.S \
-	arch/arm/memcmp16_arm.S \
-	arch/arm/portable_entrypoints_arm.S \
-	arch/arm/quick_entrypoints_arm.S \
-	arch/arm/arm_sdiv.S \
-	arch/arm/thread_arm.cc \
-	arch/arm/fault_handler_arm.cc
+  arch/arm/context_arm.cc.arm \
+  arch/arm/entrypoints_init_arm.cc \
+  arch/arm/jni_entrypoints_arm.S \
+  arch/arm/memcmp16_arm.S \
+  arch/arm/portable_entrypoints_arm.S \
+  arch/arm/quick_entrypoints_arm.S \
+  arch/arm/arm_sdiv.S \
+  arch/arm/thread_arm.cc \
+  arch/arm/fault_handler_arm.cc
 
 LIBART_TARGET_SRC_FILES_arm64 := \
-	arch/arm64/context_arm64.cc \
-	arch/arm64/entrypoints_init_arm64.cc \
-	arch/arm64/jni_entrypoints_arm64.S \
-	arch/arm64/memcmp16_arm64.S \
-	arch/arm64/portable_entrypoints_arm64.S \
-	arch/arm64/quick_entrypoints_arm64.S \
-	arch/arm64/thread_arm64.cc \
-	monitor_pool.cc \
-	arch/arm64/fault_handler_arm64.cc
+  arch/arm64/context_arm64.cc \
+  arch/arm64/entrypoints_init_arm64.cc \
+  arch/arm64/jni_entrypoints_arm64.S \
+  arch/arm64/memcmp16_arm64.S \
+  arch/arm64/portable_entrypoints_arm64.S \
+  arch/arm64/quick_entrypoints_arm64.S \
+  arch/arm64/thread_arm64.cc \
+  monitor_pool.cc \
+  arch/arm64/fault_handler_arm64.cc
 
 LIBART_SRC_FILES_x86 := \
-	arch/x86/context_x86.cc \
-	arch/x86/entrypoints_init_x86.cc \
-	arch/x86/jni_entrypoints_x86.S \
-	arch/x86/portable_entrypoints_x86.S \
-	arch/x86/quick_entrypoints_x86.S \
-	arch/x86/thread_x86.cc \
-	arch/x86/fault_handler_x86.cc
+  arch/x86/context_x86.cc \
+  arch/x86/entrypoints_init_x86.cc \
+  arch/x86/jni_entrypoints_x86.S \
+  arch/x86/portable_entrypoints_x86.S \
+  arch/x86/quick_entrypoints_x86.S \
+  arch/x86/thread_x86.cc \
+  arch/x86/fault_handler_x86.cc
 
 LIBART_TARGET_SRC_FILES_x86 := \
-	$(LIBART_SRC_FILES_x86)
+  $(LIBART_SRC_FILES_x86)
 
 LIBART_SRC_FILES_x86_64 := \
-	arch/x86_64/context_x86_64.cc \
-	arch/x86_64/entrypoints_init_x86_64.cc \
-	arch/x86_64/jni_entrypoints_x86_64.S \
-	arch/x86_64/portable_entrypoints_x86_64.S \
-	arch/x86_64/quick_entrypoints_x86_64.S \
-	arch/x86_64/thread_x86_64.cc \
-	monitor_pool.cc \
-	arch/x86_64/fault_handler_x86_64.cc
+  arch/x86_64/context_x86_64.cc \
+  arch/x86_64/entrypoints_init_x86_64.cc \
+  arch/x86_64/jni_entrypoints_x86_64.S \
+  arch/x86_64/portable_entrypoints_x86_64.S \
+  arch/x86_64/quick_entrypoints_x86_64.S \
+  arch/x86_64/thread_x86_64.cc \
+  monitor_pool.cc \
+  arch/x86_64/fault_handler_x86_64.cc
 
 LIBART_TARGET_SRC_FILES_x86_64 := \
-	$(LIBART_SRC_FILES_x86_64) \
+  $(LIBART_SRC_FILES_x86_64) \
 
 LIBART_TARGET_SRC_FILES_mips := \
-	arch/mips/context_mips.cc \
-	arch/mips/entrypoints_init_mips.cc \
-	arch/mips/jni_entrypoints_mips.S \
-	arch/mips/memcmp16_mips.S \
-	arch/mips/portable_entrypoints_mips.S \
-	arch/mips/quick_entrypoints_mips.S \
-	arch/mips/thread_mips.cc \
-	arch/mips/fault_handler_mips.cc
+  arch/mips/context_mips.cc \
+  arch/mips/entrypoints_init_mips.cc \
+  arch/mips/jni_entrypoints_mips.S \
+  arch/mips/memcmp16_mips.S \
+  arch/mips/portable_entrypoints_mips.S \
+  arch/mips/quick_entrypoints_mips.S \
+  arch/mips/thread_mips.cc \
+  arch/mips/fault_handler_mips.cc
 
 ifeq ($(TARGET_ARCH),mips64)
 $(info TODOMips64: $(LOCAL_PATH)/Android.mk Add mips64 specific runtime files)
 endif # TARGET_ARCH != mips64
 
 LIBART_HOST_SRC_FILES := \
-	$(LIBART_COMMON_SRC_FILES) \
-	base/logging_linux.cc \
-	monitor_linux.cc \
-	runtime_linux.cc \
-	thread_linux.cc
+  $(LIBART_COMMON_SRC_FILES) \
+  base/logging_linux.cc \
+  monitor_linux.cc \
+  runtime_linux.cc \
+  thread_linux.cc
 
 LIBART_HOST_SRC_FILES_32 := \
-	$(LIBART_SRC_FILES_x86)
+  $(LIBART_SRC_FILES_x86)
 
 LIBART_HOST_SRC_FILES_64 := \
-	$(LIBART_SRC_FILES_x86_64)
+  $(LIBART_SRC_FILES_x86_64)
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
-	arch/x86_64/registers_x86_64.h \
-	base/mutex.h \
-	dex_file.h \
-	dex_instruction.h \
-	gc/collector/gc_type.h \
-	gc/space/space.h \
-	gc/heap.h \
-	indirect_reference_table.h \
-	instruction_set.h \
-	invoke_type.h \
-	jdwp/jdwp.h \
-	jdwp/jdwp_constants.h \
-	lock_word.h \
-	mirror/class.h \
-	oat.h \
-	object_callbacks.h \
-	quick/inline_method_analyser.h \
-	thread.h \
-	thread_state.h \
-	verifier/method_verifier.h
+  arch/x86_64/registers_x86_64.h \
+  base/mutex.h \
+  dex_file.h \
+  dex_instruction.h \
+  gc/collector/gc_type.h \
+  gc/space/space.h \
+  gc/heap.h \
+  indirect_reference_table.h \
+  instruction_set.h \
+  invoke_type.h \
+  jdwp/jdwp.h \
+  jdwp/jdwp_constants.h \
+  lock_word.h \
+  mirror/class.h \
+  oat.h \
+  object_callbacks.h \
+  quick/inline_method_analyser.h \
+  thread.h \
+  thread_state.h \
+  verifier/method_verifier.h
 
 LIBART_CFLAGS :=
 ifeq ($(ART_USE_PORTABLE_COMPILER),true)
   LIBART_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
 
-ifeq ($(MALLOC_IMPL),jemalloc)
-  LIBART_CFLAGS += -DUSE_JEMALLOC
-else
+ifeq ($(MALLOC_IMPL),dlmalloc)
   LIBART_CFLAGS += -DUSE_DLMALLOC
+else
+  LIBART_CFLAGS += -DUSE_JEMALLOC
 endif
 
 # $(1): target or host
@@ -412,6 +416,7 @@
     LOCAL_STATIC_LIBRARIES := libziparchive libz
   else # host
     LOCAL_STATIC_LIBRARIES += libcutils libziparchive-host libz libutils
+    LOCAL_SHARED_LIBRARIES += libsigchain
     LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($$(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 3fa09cb..8c6afd6 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -15,6 +15,7 @@
  */
 
 #include "entrypoints/interpreter/interpreter_entrypoints.h"
+#include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -25,11 +26,11 @@
 
 // Interpreter entrypoints.
 extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper& mh,
-                                                 const DexFile::CodeItem* code_item,
-                                                 ShadowFrame* shadow_frame, JValue* result);
+                                                  const DexFile::CodeItem* code_item,
+                                                  ShadowFrame* shadow_frame, JValue* result);
 extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
-                                           const DexFile::CodeItem* code_item,
-                                           ShadowFrame* shadow_frame, JValue* result);
+                                                   const DexFile::CodeItem* code_item,
+                                                   ShadowFrame* shadow_frame, JValue* result);
 
 // Portable entrypoints.
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 2a82129..e22c56e 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -46,9 +46,10 @@
   return instr_size;
 }
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
   VLOG(signals) << "sp: " << *out_sp;
@@ -114,7 +115,7 @@
   uint32_t checkinst1 = 0xf8d90000 + Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
   uint16_t checkinst2 = 0x6800;
 
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
@@ -178,7 +179,7 @@
 // to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
@@ -205,7 +206,7 @@
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  Determine the address of the next
+  // that exists below the protected region.  Determine the address of the next
   // available valid address below the protected region.
   uintptr_t prevsp = sp;
   sp = pregion;
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 422e20cf..f353408 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -21,6 +21,7 @@
 
 // TODO Thread offsets need to be checked when on Aarch64.
 
+// Note: these callee save methods loads require read barriers.
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
 // Offset of field Runtime::callee_save_methods_[kRefsOnly]
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index c19b79e..0c33d9c 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -15,6 +15,7 @@
  */
 
 #include "entrypoints/interpreter/interpreter_entrypoints.h"
+#include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 74c3023..34eede6 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 7907b6e..2201b55 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -31,6 +31,7 @@
     ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
 
     // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr x9, [x9, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #368
@@ -109,6 +110,7 @@
     ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
 
     // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr x9, [x9, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #176
@@ -280,6 +282,7 @@
     ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
 
     // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr x9, [x9, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 70a9619..d3e7d5e 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -14,11 +14,14 @@
  * limitations under the License.
  */
 
+#include "entrypoints/interpreter/interpreter_entrypoints.h"
+#include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
+#include "atomic.h"
 
 namespace art {
 
@@ -194,11 +197,11 @@
   qpoints->pCmplDouble = CmplDouble;
   qpoints->pCmplFloat = CmplFloat;
   qpoints->pFmod = fmod;
-  qpoints->pL2d = __floatdidf;
+  qpoints->pL2d = art_l2d;
   qpoints->pFmodf = fmodf;
-  qpoints->pL2f = __floatdisf;
-  qpoints->pD2iz = __fixdfsi;
-  qpoints->pF2iz = __fixsfsi;
+  qpoints->pL2f = art_l2f;
+  qpoints->pD2iz = art_d2i;
+  qpoints->pF2iz = art_f2i;
   qpoints->pIdivmod = NULL;
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
@@ -234,6 +237,10 @@
   qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+
+  // Atomic 64-bit load/store
+  qpoints->pA64Load = QuasiAtomic::Read64;
+  qpoints->pA64Store = QuasiAtomic::Write64;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 1ecd7d9..5a64a69 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index eb490eb..25f9a5a 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include <cstdio>
+
 #include "common_runtime_test.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
-
-#include <cstdio>
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
@@ -45,7 +46,7 @@
     }
   }
 
-  void SetUpRuntimeOptions(Runtime::Options *options) OVERRIDE {
+  void SetUpRuntimeOptions(RuntimeOptions *options) OVERRIDE {
     // Use a smaller heap
     for (std::pair<std::string, const void*>& pair : *options) {
       if (pair.first.find("-Xmx") == 0) {
@@ -1739,8 +1740,8 @@
   // Sanity check: check that there is a conflict for List.contains in ArrayList.
 
   mirror::Class* arraylist_class = soa.Decode<mirror::Class*>(arraylist_jclass);
-  mirror::ArtMethod* m = arraylist_class->GetImTable()->Get(
-      inf_contains->GetDexMethodIndex() % ClassLinker::kImtSize);
+  mirror::ArtMethod* m = arraylist_class->GetEmbeddedImTableEntry(
+      inf_contains->GetDexMethodIndex() % mirror::Class::kImtSize);
 
   if (!m->IsImtConflictMethod()) {
     LOG(WARNING) << "Test is meaningless, no IMT conflict in setup: " <<
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index b217cd6..a072996 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "entrypoints/interpreter/interpreter_entrypoints.h"
+#include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 7c1980e..435f280 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -21,7 +21,21 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
 
+#if defined(__APPLE__)
+#define ucontext __darwin_ucontext
+#define CTX_ESP uc_mcontext->__ss.__esp
+#define CTX_EIP uc_mcontext->__ss.__eip
+#define CTX_EAX uc_mcontext->__ss.__eax
+#else
+#define CTX_ESP uc_mcontext.gregs[REG_ESP]
+#define CTX_EIP uc_mcontext.gregs[REG_EIP]
+#define CTX_EAX uc_mcontext.gregs[REG_EAX]
+#endif
 
 //
 // X86 specific fault handler functions.
@@ -29,19 +43,292 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_test_suspend();
+
+// From the x86 disassembler...
+enum SegmentPrefix {
+  kCs = 0x2e,
+  kSs = 0x36,
+  kDs = 0x3e,
+  kEs = 0x26,
+  kFs = 0x64,
+  kGs = 0x65,
+};
+
+// Get the size of an instruction in bytes.
+static uint32_t GetInstructionSize(uint8_t* pc) {
+  uint8_t* instruction_start = pc;
+  bool have_prefixes = true;
+  bool two_byte = false;
+
+  // Skip all the prefixes.
+  do {
+    switch (*pc) {
+        // Group 1 - lock and repeat prefixes:
+      case 0xF0:
+      case 0xF2:
+      case 0xF3:
+        // Group 2 - segment override prefixes:
+      case kCs:
+      case kSs:
+      case kDs:
+      case kEs:
+      case kFs:
+      case kGs:
+        // Group 3 - operand size override:
+      case 0x66:
+        // Group 4 - address size override:
+      case 0x67:
+        break;
+      default:
+        have_prefixes = false;
+        break;
+    }
+    if (have_prefixes) {
+      pc++;
+    }
+  } while (have_prefixes);
+
+#if defined(__x86_64__)
+  // Skip REX is present.
+  if (*pc >= 0x40 && *pc <= 0x4F) {
+    ++pc;
+  }
+#endif
+
+  // Check for known instructions.
+  uint32_t known_length = 0;
+  switch (*pc) {
+  case 0x83:                // cmp [r + v], b: 4 byte instruction
+    known_length = 4;
+    break;
+  }
+
+  if (known_length > 0) {
+    VLOG(signals) << "known instruction with length " << known_length;
+    return known_length;
+  }
+
+  // Unknown instruction, work out length.
+
+  // Work out if we have a ModR/M byte.
+  uint8_t opcode = *pc++;
+  if (opcode == 0xf) {
+    two_byte = true;
+    opcode = *pc++;
+  }
+
+  bool has_modrm = false;         // Is ModR/M byte present?
+  uint8_t hi = opcode >> 4;       // Opcode high nybble.
+  uint8_t lo = opcode & 0b1111;   // Opcode low nybble.
+
+  // From the Intel opcode tables.
+  if (two_byte) {
+    has_modrm = true;   // TODO: all of these?
+  } else if (hi < 4) {
+    has_modrm = lo < 4 || (lo >= 8 && lo <= 0xb);
+  } else if (hi == 6) {
+    has_modrm = lo == 3 || lo == 9 || lo == 0xb;
+  } else if (hi == 8) {
+    has_modrm = lo != 0xd;
+  } else if (hi == 0xc) {
+    has_modrm = lo == 1 || lo == 2 || lo == 6 || lo == 7;
+  } else if (hi == 0xd) {
+    has_modrm = lo < 4;
+  } else if (hi == 0xf) {
+    has_modrm = lo == 6 || lo == 7;
+  }
+
+  if (has_modrm) {
+    uint8_t modrm = *pc++;
+    uint8_t mod = (modrm >> 6) & 0b11;
+    uint8_t reg = (modrm >> 3) & 0b111;
+    switch (mod) {
+      case 0:
+        break;
+      case 1:
+        if (reg == 4) {
+          // SIB + 1 byte displacement.
+          pc += 2;
+        } else {
+          pc += 1;
+        }
+        break;
+      case 2:
+        // SIB + 4 byte displacement.
+        pc += 5;
+        break;
+      case 3:
+        break;
+    }
+  }
+
+  VLOG(signals) << "calculated X86 instruction size is " << (pc - instruction_start);
+  return pc - instruction_start;
+}
+
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  *out_sp = static_cast<uintptr_t>(uc->CTX_ESP);
+  VLOG(signals) << "sp: " << std::hex << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in EAX.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->CTX_EAX);
+  } else {
+    // The method is at the top of the stack.
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(reinterpret_cast<uintptr_t*>(*out_sp)[0]);
+  }
+
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
+  VLOG(signals) << HexDump(pc, 32, true, "PC ");
+
+  uint32_t instr_size = GetInstructionSize(pc);
+  *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
 bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
+  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
+
+  uint32_t instr_size = GetInstructionSize(pc);
+  // We need to arrange for the signal handler to return to the null pointer
+  // exception generator.  The return address must be the address of the
+  // next instruction (this instruction + instruction size).  The return address
+  // is on the stack at the top address of the current frame.
+
+  // Push the return address onto the stack.
+  uint32_t retaddr = reinterpret_cast<uint32_t>(pc + instr_size);
+  uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+  *next_sp = retaddr;
+  uc->CTX_ESP = reinterpret_cast<uint32_t>(next_sp);
+
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
+}
+
+// A suspend check is done using the following instruction sequence:
+// 0xf720f1df:         648B058C000000      mov     eax, fs:[0x8c]  ; suspend_trigger
+// .. some intervening instructions.
+// 0xf720f1e6:                   8500      test    eax, [eax]
+
+// The offset from fs is Thread::ThreadSuspendTriggerOffset().
+// To check for a suspend check, we examine the instructions that caused
+// the fault.
+bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
+  // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
+  // where xxx is the offset of the suspend trigger.
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+
+  VLOG(signals) << "Checking for suspension point";
+  uint8_t checkinst1[] = {0x64, 0x8b, 0x05, static_cast<uint8_t>(trigger & 0xff),
+      static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+  uint8_t checkinst2[] = {0x85, 0x00};
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
+  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
+
+  if (pc[0] != checkinst2[0] || pc[1] != checkinst2[1]) {
+    // Second instruction is not correct (test eax,[eax]).
+    VLOG(signals) << "Not a suspension point";
+    return false;
+  }
+
+  // The first instruction can a little bit up the stream due to load hoisting
+  // in the compiler.
+  uint8_t* limit = pc - 100;   // Compiler will hoist to a max of 20 instructions.
+  uint8_t* ptr = pc - sizeof(checkinst1);
+  bool found = false;
+  while (ptr > limit) {
+    if (memcmp(ptr, checkinst1, sizeof(checkinst1)) == 0) {
+      found = true;
+      break;
+    }
+    ptr -= 1;
+  }
+
+  if (found) {
+    VLOG(signals) << "suspend check match";
+
+    // We need to arrange for the signal handler to return to the null pointer
+    // exception generator.  The return address must be the address of the
+    // next instruction (this instruction + 2).  The return address
+    // is on the stack at the top address of the current frame.
+
+    // Push the return address onto the stack.
+    uint32_t retaddr = reinterpret_cast<uint32_t>(pc + 2);
+    uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+    *next_sp = retaddr;
+    uc->CTX_ESP = reinterpret_cast<uint32_t>(next_sp);
+
+    uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
+
+    // Now remove the suspend trigger that caused this fault.
+    Thread::Current()->RemoveSuspendTrigger();
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
+    return true;
+  }
+  VLOG(signals) << "Not a suspend check match, first instruction mismatch";
   return false;
 }
 
-bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
-}
+// The stack overflow check is done using the following instruction:
+// test eax, [esp+ -xxx]
+// where 'xxx' is the size of the overflow area.
+//
+// This is done before any frame is established in the method.  The return
+// address for the previous method is on the stack at ESP.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uintptr_t sp = static_cast<uintptr_t>(uc->CTX_ESP);
+
+  uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+    ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
+
+  Thread* self = Thread::Current();
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << pregion;
+
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  uc->CTX_EAX = pregion;
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  return true;
 }
 }       // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 24b9e46..68f46ad 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -173,6 +173,21 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
+// On entry to this function, EAX contains the ESP value for the overflow region.
+DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
+    // Here, the ESP is above the protected region.  We need to create a
+    // callee save frame and then move ESP down to the overflow region.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx                // get current stack pointer
+    mov %eax, %esp                // move ESP to the overflow region.
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
+    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_throw_stack_overflow_from_signal
+
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index bff8501..c3637ef 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,6 +19,7 @@
 
 #include "asm_support.h"
 
+// Note: these callee save methods loads require read barriers.
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
 // Offset of field Runtime::callee_save_methods_[kRefsOnly]
@@ -35,9 +36,9 @@
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
 #define THREAD_ID_OFFSET 12
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
 
 // Expected size of a heap reference
 #define HEAP_REFERENCE_SIZE 4
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index e1f47ee..7699eaf 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -78,6 +78,18 @@
   gprs_[R9] = nullptr;
   gprs_[R10] = nullptr;
   gprs_[R11] = nullptr;
+  fprs_[XMM0] = nullptr;
+  fprs_[XMM1] = nullptr;
+  fprs_[XMM2] = nullptr;
+  fprs_[XMM3] = nullptr;
+  fprs_[XMM4] = nullptr;
+  fprs_[XMM5] = nullptr;
+  fprs_[XMM6] = nullptr;
+  fprs_[XMM7] = nullptr;
+  fprs_[XMM8] = nullptr;
+  fprs_[XMM9] = nullptr;
+  fprs_[XMM10] = nullptr;
+  fprs_[XMM11] = nullptr;
 }
 
 bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
@@ -102,41 +114,26 @@
   }
 }
 
+extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*);
+
 void X86_64Context::DoLongJump() {
 #if defined(__x86_64__)
-  // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at
-  // the top for the stack pointer that doesn't get popped in a pop-all.
-  volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
+  uintptr_t gprs[kNumberOfCpuRegisters + 1];
+  uintptr_t fprs[kNumberOfFloatRegisters];
+
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
     gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i;
   }
+  for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86_64Context::kBadFprBase + i;
+  }
+
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize;
   gprs[kNumberOfCpuRegisters] = rsp;
   *(reinterpret_cast<uintptr_t*>(rsp)) = rip_;
-  __asm__ __volatile__(
-      "movq %0, %%rsp\n\t"  // RSP points to gprs.
-      "popq %%r15\n\t"       // Load all registers except RSP and RIP with values in gprs.
-      "popq %%r14\n\t"
-      "popq %%r13\n\t"
-      "popq %%r12\n\t"
-      "popq %%r11\n\t"
-      "popq %%r10\n\t"
-      "popq %%r9\n\t"
-      "popq %%r8\n\t"
-      "popq %%rdi\n\t"
-      "popq %%rsi\n\t"
-      "popq %%rbp\n\t"
-      "addq $8, %%rsp\n\t"
-      "popq %%rbx\n\t"
-      "popq %%rdx\n\t"
-      "popq %%rcx\n\t"
-      "popq %%rax\n\t"
-      "popq %%rsp\n\t"      // Load stack pointer.
-      "ret\n\t"             // From higher in the stack pop rip.
-      :  // output.
-      : "g"(&gprs[0])  // input.
-      :);  // clobber.
+
+  art_quick_do_long_jump(gprs, fprs);
 #else
   UNIMPLEMENTED(FATAL);
 #endif
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 609d1c6..35a0cf4 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "entrypoints/interpreter/interpreter_entrypoints.h"
+#include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -27,15 +29,15 @@
                                                   const DexFile::CodeItem* code_item,
                                                   ShadowFrame* shadow_frame, JValue* result);
 extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
-                                           const DexFile::CodeItem* code_item,
-                                           ShadowFrame* shadow_frame, JValue* result);
+                                                   const DexFile::CodeItem* code_item,
+                                                   ShadowFrame* shadow_frame, JValue* result);
 
 // Portable entrypoints.
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 extern "C" void art_quick_check_cast(void*, void*);
 
@@ -129,7 +131,7 @@
   ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+  qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
diff --git a/runtime/arch/x86_64/fault_handler_x86_64.cc b/runtime/arch/x86_64/fault_handler_x86_64.cc
index 233d3c7..88ae7f3 100644
--- a/runtime/arch/x86_64/fault_handler_x86_64.cc
+++ b/runtime/arch/x86_64/fault_handler_x86_64.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index d668797..f6736df 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -28,8 +28,8 @@
     PUSH rdx  // Arg.
     PUSH rcx  // Arg.
     // Create space for FPR args, plus padding for alignment
-    subq LITERAL(72), %rsp
-    CFI_ADJUST_CFA_OFFSET(72)
+    subq LITERAL(72 + 4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(72 + 4 * 8)
     // Save FPRs.
     movq %xmm0, 0(%rsp)
     movq %xmm1, 8(%rsp)
@@ -39,6 +39,10 @@
     movq %xmm5, 40(%rsp)
     movq %xmm6, 48(%rsp)
     movq %xmm7, 56(%rsp)
+    movq %xmm12, 64(%rsp)
+    movq %xmm13, 72(%rsp)
+    movq %xmm14, 80(%rsp)
+    movq %xmm15, 88(%rsp)
     // prepare call
     movq %gs:THREAD_SELF_OFFSET, %rdi      // RDI := Thread::Current()
     // call
@@ -52,8 +56,12 @@
     movq 40(%rsp), %xmm5
     movq 48(%rsp), %xmm6
     movq 56(%rsp), %xmm7
-    addq LITERAL(72), %rsp
-    CFI_ADJUST_CFA_OFFSET(-72)
+    movq 64(%rsp), %xmm12
+    movq 72(%rsp), %xmm13
+    movq 80(%rsp), %xmm14
+    movq 88(%rsp), %xmm15
+    addq LITERAL(72 + 4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8)
     POP rcx  // Arg.
     POP rdx  // Arg.
     POP rsi  // Arg.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8fa947c..50b2de4 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -16,6 +16,26 @@
 
 #include "asm_support_x86_64.S"
 
+MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
+    // Create space for ART FP callee-saved registers
+    subq MACRO_LITERAL(4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(4 * 8)
+    movq %xmm12, 0(%rsp)
+    movq %xmm13, 8(%rsp)
+    movq %xmm14, 16(%rsp)
+    movq %xmm15, 24(%rsp)
+END_MACRO
+
+MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
+    // Restore ART FP callee-saved registers
+    movq 0(%rsp), %xmm12
+    movq 8(%rsp), %xmm13
+    movq 16(%rsp), %xmm14
+    movq 24(%rsp), %xmm15
+    addq MACRO_LITERAL(4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
+END_MACRO
+
 // For x86, the CFA is esp+4, the address above the pushed return address on the stack.
 
     /*
@@ -37,16 +57,25 @@
     PUSH r12  // Callee save.
     PUSH rbp  // Callee save.
     PUSH rbx  // Callee save.
+    // Create space for FPR args, plus padding for alignment
+    subq LITERAL(4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(4 * 8)
+    // Save FPRs.
+    movq %xmm12, 0(%rsp)
+    movq %xmm13, 8(%rsp)
+    movq %xmm14, 16(%rsp)
+    movq %xmm15, 24(%rsp)
     subq MACRO_LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
     CFI_ADJUST_CFA_OFFSET(8)
     // R10 := ArtMethod* for save all callee save frame method.
+    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
@@ -71,24 +100,35 @@
     PUSH r12  // Callee save.
     PUSH rbp  // Callee save.
     PUSH rbx  // Callee save.
-    subq MACRO_LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
-    CFI_ADJUST_CFA_OFFSET(8)
+    // Create space for FPR args, plus padding for alignment
+    subq LITERAL(8 + 4*8), %rsp
+    CFI_ADJUST_CFA_OFFSET(8 + 4*8)
+    // Save FPRs.
+    movq %xmm12, 8(%rsp)
+    movq %xmm13, 16(%rsp)
+    movq %xmm14, 24(%rsp)
+    movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
+    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
 MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
-    addq MACRO_LITERAL(8), %rsp
-    CFI_ADJUST_CFA_OFFSET(-8)
+    movq 8(%rsp), %xmm12
+    movq 16(%rsp), %xmm13
+    movq 24(%rsp), %xmm14
+    movq 32(%rsp), %xmm15
+    addq LITERAL(8 + 4*8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
     // TODO: optimize by not restoring callee-saves restored by the ABI
     POP rbx
     POP rbp
@@ -123,9 +163,10 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
-    subq MACRO_LITERAL(80), %rsp
-    CFI_ADJUST_CFA_OFFSET(80)
+    subq MACRO_LITERAL(80 + 4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
+    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
@@ -136,12 +177,16 @@
     movq %xmm5, 56(%rsp)
     movq %xmm6, 64(%rsp)
     movq %xmm7, 72(%rsp)
+    movq %xmm12, 80(%rsp)
+    movq %xmm13, 88(%rsp)
+    movq %xmm14, 96(%rsp)
+    movq %xmm15, 104(%rsp)
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8)
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
@@ -157,8 +202,12 @@
     movq 56(%rsp), %xmm5
     movq 64(%rsp), %xmm6
     movq 72(%rsp), %xmm7
-    addq MACRO_LITERAL(80), %rsp
-    CFI_ADJUST_CFA_OFFSET(-80)
+    movq 80(%rsp), %xmm12
+    movq 88(%rsp), %xmm13
+    movq 96(%rsp), %xmm14
+    movq 104(%rsp), %xmm15
+    addq MACRO_LITERAL(80 + 4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
     // Restore callee and GPR args, mixed together to agree with core spills bitmap.
     POP rcx
     POP rdx
@@ -536,6 +585,58 @@
 #endif  // __APPLE__
 END_FUNCTION art_quick_invoke_static_stub
 
+    /*
+     * Long jump stub.
+     * On entry:
+     *   rdi = gprs
+     *   rsi = fprs
+     */
+DEFINE_FUNCTION art_quick_do_long_jump
+#if defined(__APPLE__)
+    int3
+    int3
+#else
+    // Restore FPRs.
+    movq 0(%rsi), %xmm0
+    movq 8(%rsi), %xmm1
+    movq 16(%rsi), %xmm2
+    movq 24(%rsi), %xmm3
+    movq 32(%rsi), %xmm4
+    movq 40(%rsi), %xmm5
+    movq 48(%rsi), %xmm6
+    movq 56(%rsi), %xmm7
+    movq 64(%rsi), %xmm8
+    movq 72(%rsi), %xmm9
+    movq 80(%rsi), %xmm10
+    movq 88(%rsi), %xmm11
+    movq 96(%rsi), %xmm12
+    movq 104(%rsi), %xmm13
+    movq 112(%rsi), %xmm14
+    movq 120(%rsi), %xmm15
+    // Restore FPRs.
+    movq %rdi, %rsp   // RSP points to gprs.
+    // Load all registers except RSP and RIP with values in gprs.
+    popq %r15
+    popq %r14
+    popq %r13
+    popq %r12
+    popq %r11
+    popq %r10
+    popq %r9
+    popq %r8
+    popq %rdi
+    popq %rsi
+    popq %rbp
+    addq LITERAL(8), %rsp   // Skip rsp
+    popq %rbx
+    popq %rdx
+    popq %rcx
+    popq %rax
+    popq %rsp      // Load stack pointer.
+    ret            // From higher in the stack pop rip.
+#endif  // __APPLE__
+END_FUNCTION art_quick_do_long_jump
+
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
@@ -820,13 +921,17 @@
 DEFINE_FUNCTION art_quick_check_cast
     PUSH rdi                          // Save args for exc
     PUSH rsi
+    SETUP_FP_CALLEE_SAVE_FRAME
     call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     testq %rax, %rax
     jz 1f                             // jump forward if not assignable
+    RESTORE_FP_CALLEE_SAVE_FRAME
     addq LITERAL(16), %rsp            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
+
     ret
 1:
+    RESTORE_FP_CALLEE_SAVE_FRAME
     POP rsi                           // Pop arguments
     POP rdi
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
@@ -907,6 +1012,7 @@
     PUSH rdx
     subq LITERAL(8), %rsp        // Alignment padding.
     CFI_ADJUST_CFA_OFFSET(8)
+    SETUP_FP_CALLEE_SAVE_FRAME
 
                                   // "Uncompress" = do nothing, as already zero-extended on load.
     movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
@@ -918,6 +1024,7 @@
     testq %rax, %rax
     jz   .Lthrow_array_store_exception
 
+    RESTORE_FP_CALLEE_SAVE_FRAME
     // Restore arguments.
     addq LITERAL(8), %rsp
     CFI_ADJUST_CFA_OFFSET(-8)
@@ -934,6 +1041,7 @@
 //  movb %dl, (%rdx, %rdi)
     ret
 .Lthrow_array_store_exception:
+    RESTORE_FP_CALLEE_SAVE_FRAME
     // Restore arguments.
     addq LITERAL(8), %rsp
     CFI_ADJUST_CFA_OFFSET(-8)
@@ -1012,8 +1120,8 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
-    subq LITERAL(80), %rsp
-    CFI_ADJUST_CFA_OFFSET(80)
+    subq LITERAL(80 + 4*8), %rsp
+    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
@@ -1023,14 +1131,18 @@
     movq %xmm5, 56(%rsp)
     movq %xmm6, 64(%rsp)
     movq %xmm7, 72(%rsp)
+    movq %xmm12, 80(%rsp)
+    movq %xmm13, 88(%rsp)
+    movq %xmm14, 96(%rsp)
+    movq %xmm15, 104(%rsp)
     // Store proxy method to bottom of stack.
     movq %rdi, 0(%rsp)
     movq %gs:THREAD_SELF_OFFSET, %rdx  // Pass Thread::Current().
     movq %rsp, %rcx                    // Pass SP.
     call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
     movq %rax, %xmm0                   // Copy return value in case of float returns.
-    addq LITERAL(168), %rsp            // Pop arguments.
-    CFI_ADJUST_CFA_OFFSET(-168)
+    addq LITERAL(168 + 4*8), %rsp            // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
 
@@ -1156,8 +1268,8 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
-    subq LITERAL(80), %rsp
-    CFI_ADJUST_CFA_OFFSET(80)
+    subq LITERAL(80 + 4*8), %rsp
+    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
@@ -1167,6 +1279,10 @@
     movq %xmm5, 56(%rsp)
     movq %xmm6, 64(%rsp)
     movq %xmm7, 72(%rsp)
+    movq %xmm12, 80(%rsp)
+    movq %xmm13, 88(%rsp)
+    movq %xmm14, 96(%rsp)
+    movq %xmm15, 104(%rsp)
     movq %rdi, 0(%rsp)              // Store native ArtMethod* to bottom of stack.
     movq %rsp, %rbp                 // save SP at (old) callee-save frame
     CFI_DEF_CFA_REGISTER(rbp)
@@ -1260,9 +1376,13 @@
     movq 56(%rsp), %xmm5
     movq 64(%rsp), %xmm6
     movq 72(%rsp), %xmm7
+    movq 80(%rsp), %xmm12
+    movq 88(%rsp), %xmm13
+    movq 96(%rsp), %xmm14
+    movq 104(%rsp), %xmm15
     // was 80 bytes
-    addq LITERAL(80), %rsp
-    CFI_ADJUST_CFA_OFFSET(-80)
+    addq LITERAL(80 + 4*8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     POP rcx  // Arg.
     POP rdx  // Arg.
@@ -1292,9 +1412,13 @@
     movq 56(%rsp), %xmm5
     movq 64(%rsp), %xmm6
     movq 72(%rsp), %xmm7
-    // was 80 bytes
-    addq LITERAL(80), %rsp
-    CFI_ADJUST_CFA_OFFSET(-80)
+    movq 80(%rsp), %xmm12
+    movq 88(%rsp), %xmm13
+    movq 96(%rsp), %xmm14
+    movq 104(%rsp), %xmm15
+    // was 80 + 32 bytes
+    addq LITERAL(80 + 4*8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     POP rcx  // Arg.
     POP rdx  // Arg.
@@ -1450,3 +1574,10 @@
 END_FUNCTION art_quick_string_compareto
 
 UNIMPLEMENTED art_quick_memcmp16
+
+DEFINE_FUNCTION art_quick_assignable_from_code
+    SETUP_FP_CALLEE_SAVE_FRAME
+    call PLT_SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_assignable_from_code
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 6183909..53aa212 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -34,6 +34,9 @@
     (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
     (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
     (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
+static constexpr uint32_t kX86_64CalleeSaveFpSpills =
+    (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
+    (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
 
 constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kX86_64CalleeSaveRefSpills |
@@ -42,7 +45,8 @@
 }
 
 constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-  return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+  return kX86_64CalleeSaveFpSpills |
+      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc
index 38f3494..f29c426 100644
--- a/runtime/arch/x86_64/registers_x86_64.cc
+++ b/runtime/arch/x86_64/registers_x86_64.cc
@@ -34,5 +34,14 @@
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs) {
+  if (rhs >= XMM0 && rhs <= XMM15) {
+    os << "xmm" << static_cast<int>(rhs);
+  } else {
+    os << "Register[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 086ef44..de348dc 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -22,6 +22,7 @@
 #include "common_runtime_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
+#include "thread-inl.h"
 
 namespace art {
 class CheckWaitTask : public Task {
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index fe5a2ef..fae9271 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -176,6 +176,7 @@
 #endif
 
 #define PURE __attribute__ ((__pure__))
+#define WARN_UNUSED __attribute__((warn_unused_result))
 
 template<typename T> void UNUSED(const T&) {}
 
diff --git a/runtime/base/mutex_test.cc b/runtime/base/mutex_test.cc
index ee0b1be..289d3ef 100644
--- a/runtime/base/mutex_test.cc
+++ b/runtime/base/mutex_test.cc
@@ -17,6 +17,7 @@
 #include "mutex.h"
 
 #include "common_runtime_test.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/base/scoped_flock_test.cc b/runtime/base/scoped_flock_test.cc
index 8fa181a..1fa7a12 100644
--- a/runtime/base/scoped_flock_test.cc
+++ b/runtime/base/scoped_flock_test.cc
@@ -15,9 +15,8 @@
  */
 
 #include "scoped_flock.h"
-#include "common_runtime_test.h"
 
-#include "gtest/gtest.h"
+#include "common_runtime_test.h"
 
 namespace art {
 
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index fefb907..a530594 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -23,6 +23,7 @@
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
+#include "field_helper.h"
 #include "gc/space/space.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
@@ -31,7 +32,6 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "mirror/throwable.h"
-#include "object_utils.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -209,7 +209,7 @@
         // obj will be NULL.  Otherwise, obj should always be non-NULL
         // and valid.
         if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
-          Runtime::Current()->GetHeap()->DumpSpaces();
+          Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
           JniAbortF(function_name_, "field operation on invalid %s: %p",
                     ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
           return;
@@ -248,7 +248,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
     if (o == nullptr || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-      Runtime::Current()->GetHeap()->DumpSpaces();
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       JniAbortF(function_name_, "field operation on invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
       return;
@@ -628,7 +628,7 @@
 
     mirror::Object* obj = soa_.Decode<mirror::Object*>(java_object);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
-      Runtime::Current()->GetHeap()->DumpSpaces();
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       JniAbortF(function_name_, "%s is an invalid %s: %p (%p)",
                 what, ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object, obj);
       return false;
@@ -682,7 +682,7 @@
 
     mirror::Array* a = soa_.Decode<mirror::Array*>(java_array);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(a)) {
-      Runtime::Current()->GetHeap()->DumpSpaces();
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       JniAbortF(function_name_, "jarray is an invalid %s: %p (%p)",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_array)).c_str(), java_array, a);
     } else if (!a->IsArrayInstance()) {
@@ -703,7 +703,7 @@
     }
     mirror::ArtField* f = soa_.DecodeField(fid);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f) || !f->IsArtField()) {
-      Runtime::Current()->GetHeap()->DumpSpaces();
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       JniAbortF(function_name_, "invalid jfieldID: %p", fid);
       return nullptr;
     }
@@ -717,7 +717,7 @@
     }
     mirror::ArtMethod* m = soa_.DecodeMethod(mid);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m) || !m->IsArtMethod()) {
-      Runtime::Current()->GetHeap()->DumpSpaces();
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       JniAbortF(function_name_, "invalid jmethodID: %p", mid);
       return nullptr;
     }
@@ -738,7 +738,7 @@
 
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-      Runtime::Current()->GetHeap()->DumpSpaces();
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       // TODO: when we remove work_around_app_jni_bugs, this should be impossible.
       JniAbortF(function_name_, "native code passing in reference to invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index a40a2e4..25eb3a3 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -24,7 +24,6 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/iftable.h"
 #include "mirror/object_array.h"
-#include "object_utils.h"
 #include "handle_scope-inl.h"
 
 namespace art {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5180e34..2c11f8b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -42,8 +42,10 @@
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "leb128.h"
+#include "method_helper.h"
 #include "oat.h"
 #include "oat_file.h"
+#include "object_lock.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
@@ -54,8 +56,9 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/proxy.h"
+#include "mirror/reference-inl.h"
 #include "mirror/stack_trace_element.h"
-#include "object_utils.h"
+#include "mirror/string-inl.h"
 #include "os.h"
 #include "runtime.h"
 #include "entrypoints/entrypoint_utils.h"
@@ -207,7 +210,8 @@
   heap->IncrementDisableMovingGC(self);
   StackHandleScope<64> hs(self);  // 64 is picked arbitrarily.
   Handle<mirror::Class> java_lang_Class(hs.NewHandle(down_cast<mirror::Class*>(
-      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass),
+      heap->AllocNonMovableObject<true>(self, nullptr,
+                                        mirror::Class::ClassClassSize(),
                                         VoidFunctor()))));
   CHECK(java_lang_Class.Get() != NULL);
   mirror::Class::SetClassClass(java_lang_Class.Get());
@@ -215,45 +219,53 @@
   if (kUseBakerOrBrooksReadBarrier) {
     java_lang_Class->AssertReadBarrierPointer();
   }
-  java_lang_Class->SetClassSize(sizeof(mirror::ClassClass));
+  java_lang_Class->SetClassSize(mirror::Class::ClassClassSize());
   heap->DecrementDisableMovingGC(self);
   // AllocClass(mirror::Class*) can now be used
 
   // Class[] is used for reflection support.
-  Handle<mirror::Class> class_array_class(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> class_array_class(hs.NewHandle(
+     AllocClass(self, java_lang_Class.Get(), mirror::ObjectArray<mirror::Class>::ClassSize())));
   class_array_class->SetComponentType(java_lang_Class.Get());
 
   // java_lang_Object comes next so that object_array_class can be created.
-  Handle<mirror::Class> java_lang_Object(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> java_lang_Object(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::Object::ClassSize())));
   CHECK(java_lang_Object.Get() != NULL);
   // backfill Object as the super class of Class.
   java_lang_Class->SetSuperClass(java_lang_Object.Get());
   java_lang_Object->SetStatus(mirror::Class::kStatusLoaded, self);
 
   // Object[] next to hold class roots.
-  Handle<mirror::Class> object_array_class(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> object_array_class(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::ObjectArray<mirror::Object>::ClassSize())));
   object_array_class->SetComponentType(java_lang_Object.Get());
 
-  // Setup the char class to be used for char[].
-  Handle<mirror::Class> char_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
-                                                           sizeof(mirror::Class))));
+  // Setup the char (primitive) class to be used for char[].
+  Handle<mirror::Class> char_class(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::Class::PrimitiveClassSize())));
 
   // Setup the char[] class to be used for String.
-  Handle<mirror::Class> char_array_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
-                                                                 sizeof(mirror::Class))));
+  Handle<mirror::Class> char_array_class(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(),
+                 mirror::Array::ClassSize())));
   char_array_class->SetComponentType(char_class.Get());
   mirror::CharArray::SetArrayClass(char_array_class.Get());
 
   // Setup String.
-  Handle<mirror::Class> java_lang_String(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
-                                                                 sizeof(mirror::StringClass))));
+  Handle<mirror::Class> java_lang_String(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::String::ClassSize())));
   mirror::String::SetClass(java_lang_String.Get());
-  java_lang_String->SetObjectSize(sizeof(mirror::String));
+  java_lang_String->SetObjectSize(mirror::String::InstanceSize());
   java_lang_String->SetStatus(mirror::Class::kStatusResolved, self);
 
+  // Setup Reference.
+  Handle<mirror::Class> java_lang_ref_Reference(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::Reference::ClassSize())));
+  mirror::Reference::SetClass(java_lang_ref_Reference.Get());
+  java_lang_ref_Reference->SetObjectSize(mirror::Reference::InstanceSize());
+  java_lang_ref_Reference->SetStatus(mirror::Class::kStatusResolved, self);
+
   // Create storage for root classes, save away our work so far (requires descriptors).
   class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.Get(),
                                                            kClassRootsMax);
@@ -264,6 +276,7 @@
   SetClassRoot(kObjectArrayClass, object_array_class.Get());
   SetClassRoot(kCharArrayClass, char_array_class.Get());
   SetClassRoot(kJavaLangString, java_lang_String.Get());
+  SetClassRoot(kJavaLangRefReference, java_lang_ref_Reference.Get());
 
   // Setup the primitive type classes.
   SetClassRoot(kPrimitiveBoolean, CreatePrimitiveClass(self, Primitive::kPrimBoolean));
@@ -279,8 +292,8 @@
   array_iftable_ = AllocIfTable(self, 2);
 
   // Create int array type for AllocDexCache (done in AppendToBootClassPath).
-  Handle<mirror::Class> int_array_class(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> int_array_class(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::Array::ClassSize())));
   int_array_class->SetComponentType(GetClassRoot(kPrimitiveInt));
   mirror::IntArray::SetArrayClass(int_array_class.Get());
   SetClassRoot(kIntArrayClass, int_array_class.Get());
@@ -288,44 +301,47 @@
   // now that these are registered, we can use AllocClass() and AllocObjectArray
 
   // Set up DexCache. This cannot be done later since AppendToBootClassPath calls AllocDexCache.
-  Handle<mirror::Class> java_lang_DexCache(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::DexCacheClass))));
+  Handle<mirror::Class> java_lang_DexCache(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::DexCache::ClassSize())));
   SetClassRoot(kJavaLangDexCache, java_lang_DexCache.Get());
-  java_lang_DexCache->SetObjectSize(sizeof(mirror::DexCache));
+  java_lang_DexCache->SetObjectSize(mirror::DexCache::InstanceSize());
   java_lang_DexCache->SetStatus(mirror::Class::kStatusResolved, self);
 
   // Constructor, Field, Method, and AbstractMethod are necessary so
   // that FindClass can link members.
-  Handle<mirror::Class> java_lang_reflect_ArtField(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::ArtFieldClass))));
+  Handle<mirror::Class> java_lang_reflect_ArtField(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::ArtField::ClassSize())));
   CHECK(java_lang_reflect_ArtField.Get() != NULL);
-  java_lang_reflect_ArtField->SetObjectSize(sizeof(mirror::ArtField));
+  java_lang_reflect_ArtField->SetObjectSize(mirror::ArtField::InstanceSize());
   SetClassRoot(kJavaLangReflectArtField, java_lang_reflect_ArtField.Get());
   java_lang_reflect_ArtField->SetStatus(mirror::Class::kStatusResolved, self);
   mirror::ArtField::SetClass(java_lang_reflect_ArtField.Get());
 
-  Handle<mirror::Class> java_lang_reflect_ArtMethod(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::ArtMethodClass))));
+  Handle<mirror::Class> java_lang_reflect_ArtMethod(hs.NewHandle(
+    AllocClass(self, java_lang_Class.Get(), mirror::ArtMethod::ClassSize())));
   CHECK(java_lang_reflect_ArtMethod.Get() != NULL);
-  java_lang_reflect_ArtMethod->SetObjectSize(sizeof(mirror::ArtMethod));
+  java_lang_reflect_ArtMethod->SetObjectSize(mirror::ArtMethod::InstanceSize());
   SetClassRoot(kJavaLangReflectArtMethod, java_lang_reflect_ArtMethod.Get());
   java_lang_reflect_ArtMethod->SetStatus(mirror::Class::kStatusResolved, self);
 
   mirror::ArtMethod::SetClass(java_lang_reflect_ArtMethod.Get());
 
   // Set up array classes for string, field, method
-  Handle<mirror::Class> object_array_string(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> object_array_string(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(),
+                 mirror::ObjectArray<mirror::String>::ClassSize())));
   object_array_string->SetComponentType(java_lang_String.Get());
   SetClassRoot(kJavaLangStringArrayClass, object_array_string.Get());
 
-  Handle<mirror::Class> object_array_art_method(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> object_array_art_method(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(),
+                 mirror::ObjectArray<mirror::ArtMethod>::ClassSize())));
   object_array_art_method->SetComponentType(java_lang_reflect_ArtMethod.Get());
   SetClassRoot(kJavaLangReflectArtMethodArrayClass, object_array_art_method.Get());
 
-  Handle<mirror::Class> object_array_art_field(
-      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> object_array_art_field(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(),
+                 mirror::ObjectArray<mirror::ArtField>::ClassSize())));
   object_array_art_field->SetComponentType(java_lang_reflect_ArtField.Get());
   SetClassRoot(kJavaLangReflectArtFieldArrayClass, object_array_art_field.Get());
 
@@ -359,16 +375,19 @@
   java_lang_Object->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* Object_class = FindSystemClass(self, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object.Get(), Object_class);
-  CHECK_EQ(java_lang_Object->GetObjectSize(), sizeof(mirror::Object));
+  CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
   java_lang_String->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* String_class = FindSystemClass(self, "Ljava/lang/String;");
-  CHECK_EQ(java_lang_String.Get(), String_class);
-  CHECK_EQ(java_lang_String->GetObjectSize(), sizeof(mirror::String));
+  std::ostringstream os1, os2;
+  java_lang_String->DumpClass(os1, mirror::Class::kDumpClassFullDetail);
+  String_class->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
+  CHECK_EQ(java_lang_String.Get(), String_class) << os1.str() << "\n\n" << os2.str();
+  CHECK_EQ(java_lang_String->GetObjectSize(), mirror::String::InstanceSize());
   java_lang_DexCache->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* DexCache_class = FindSystemClass(self, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_String.Get(), String_class);
   CHECK_EQ(java_lang_DexCache.Get(), DexCache_class);
-  CHECK_EQ(java_lang_DexCache->GetObjectSize(), sizeof(mirror::DexCache));
+  CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
   SetClassRoot(kBooleanArrayClass, FindSystemClass(self, "[Z"));
@@ -452,8 +471,12 @@
   SetClassRoot(kJavaLangReflectProxy, java_lang_reflect_Proxy);
 
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
-  mirror::Class* java_lang_ref_Reference = FindSystemClass(self, "Ljava/lang/ref/Reference;");
-  SetClassRoot(kJavaLangRefReference, java_lang_ref_Reference);
+  // finish initializing Reference class
+  java_lang_ref_Reference->SetStatus(mirror::Class::kStatusNotReady, self);
+  mirror::Class* Reference_class = FindSystemClass(self, "Ljava/lang/ref/Reference;");
+  CHECK_EQ(java_lang_ref_Reference.Get(), Reference_class);
+  CHECK_EQ(java_lang_ref_Reference->GetObjectSize(), mirror::Reference::InstanceSize());
+  CHECK_EQ(java_lang_ref_Reference->GetClassSize(), mirror::Reference::ClassSize());
   mirror::Class* java_lang_ref_FinalizerReference =
       FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
   java_lang_ref_FinalizerReference->SetAccessFlags(
@@ -476,7 +499,7 @@
 
   // Setup the ClassLoader, verifying the object_size_.
   mirror::Class* java_lang_ClassLoader = FindSystemClass(self, "Ljava/lang/ClassLoader;");
-  CHECK_EQ(java_lang_ClassLoader->GetObjectSize(), sizeof(mirror::ClassLoader));
+  CHECK_EQ(java_lang_ClassLoader->GetObjectSize(), mirror::ClassLoader::InstanceSize());
   SetClassRoot(kJavaLangClassLoader, java_lang_ClassLoader);
 
   // Set up java.lang.Throwable, java.lang.ClassNotFoundException, and
@@ -802,9 +825,20 @@
       }
     } else {
       // TODO: What to lock here?
+      bool obsolete_file_cleanup_failed;
       open_oat_file.reset(FindOatFileContainingDexFileFromDexLocation(dex_location,
                                                                       dex_location_checksum_pointer,
-                                                                      kRuntimeISA, error_msgs));
+                                                                      kRuntimeISA, error_msgs,
+                                                                      &obsolete_file_cleanup_failed));
+      // There's no point in going forward and eventually try to regenerate the
+      // file if we couldn't remove the obsolete one. Mostly likely we will fail
+      // with the same error when trying to write the new file.
+      // In case the clean up failure is due to permission issues it's *mandatory*
+      // to stop to avoid regenerating under the wrong user.
+      // TODO: should we maybe do this only when we get permission issues? (i.e. EACCESS).
+      if (obsolete_file_cleanup_failed) {
+        return false;
+      }
     }
     needs_registering = true;
   }
@@ -1062,7 +1096,9 @@
     const char* dex_location,
     const uint32_t* const dex_location_checksum,
     InstructionSet isa,
-    std::vector<std::string>* error_msgs) {
+    std::vector<std::string>* error_msgs,
+    bool* obsolete_file_cleanup_failed) {
+  *obsolete_file_cleanup_failed = false;
   // Look for an existing file next to dex. for example, for
   // /foo/bar/baz.jar, look for /foo/bar/<isa>/baz.odex.
   std::string odex_filename(DexFilenameToOdexFilename(dex_location, isa));
@@ -1089,9 +1125,18 @@
   if (oat_file != nullptr) {
     return oat_file;
   }
+
   if (!open_failed && TEMP_FAILURE_RETRY(unlink(cache_location.c_str())) != 0) {
-    PLOG(FATAL) << "Failed to remove obsolete oat file from " << cache_location;
+    std::string error_msg = StringPrintf("Failed to remove obsolete file from %s when searching"
+                                         "for dex file %s: %s",
+                                         cache_location.c_str(), dex_location, strerror(errno));
+    error_msgs->push_back(error_msg);
+    VLOG(class_linker) << error_msg;
+    // Let the caller know that we couldn't remove the obsolete file.
+    // This is a good indication that further writes may fail as well.
+    *obsolete_file_cleanup_failed = true;
   }
+
   std::string compound_msg = StringPrintf("Failed to open oat file from %s (error '%s') or %s "
                                           "(error '%s').", odex_filename.c_str(), error_msg.c_str(),
                                           cache_location.c_str(), cache_error_msg.c_str());
@@ -1159,7 +1204,9 @@
   OatFile& oat_file = GetImageOatFile(space);
   CHECK_EQ(oat_file.GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
   CHECK_EQ(oat_file.GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
-  CHECK(oat_file.GetOatHeader().GetImageFileLocation().empty());
+  const char* image_file_location = oat_file.GetOatHeader().
+      GetStoreValueByKey(OatHeader::kImageLocationKey);
+  CHECK(image_file_location == nullptr || *image_file_location == 0);
   portable_resolution_trampoline_ = oat_file.GetOatHeader().GetPortableResolutionTrampoline();
   quick_resolution_trampoline_ = oat_file.GetOatHeader().GetQuickResolutionTrampoline();
   portable_imt_conflict_trampoline_ = oat_file.GetOatHeader().GetPortableImtConflictTrampoline();
@@ -1220,6 +1267,7 @@
   array_iftable_ = GetClassRoot(kObjectArrayClass)->GetIfTable();
   DCHECK(array_iftable_ == GetClassRoot(kBooleanArrayClass)->GetIfTable());
   // String class root was set above
+  mirror::Reference::SetClass(GetClassRoot(kJavaLangRefReference));
   mirror::ArtField::SetClass(GetClassRoot(kJavaLangReflectArtField));
   mirror::BooleanArray::SetArrayClass(GetClassRoot(kBooleanArrayClass));
   mirror::ByteArray::SetArrayClass(GetClassRoot(kByteArrayClass));
@@ -1343,6 +1391,7 @@
 ClassLinker::~ClassLinker() {
   mirror::Class::ResetClass();
   mirror::String::ResetClass();
+  mirror::Reference::ResetClass();
   mirror::ArtField::ResetClass();
   mirror::ArtMethod::ResetClass();
   mirror::BooleanArray::ResetArrayClass();
@@ -1400,36 +1449,11 @@
   return dex_cache.Get();
 }
 
-// Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
-// fence.
-class InitializeClassVisitor {
- public:
-  explicit InitializeClassVisitor(uint32_t class_size) : class_size_(class_size) {
-  }
-
-  void operator()(mirror::Object* obj, size_t usable_size) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LE(class_size_, usable_size);
-    // Avoid AsClass as object is not yet in live bitmap or allocation stack.
-    mirror::Class* klass = down_cast<mirror::Class*>(obj);
-    // DCHECK(klass->IsClass());
-    klass->SetClassSize(class_size_);
-    klass->SetPrimitiveType(Primitive::kPrimNot);  // Default to not being primitive.
-    klass->SetDexClassDefIndex(DexFile::kDexNoIndex16);  // Default to no valid class def index.
-    klass->SetDexTypeIndex(DexFile::kDexNoIndex16);  // Default to no valid type index.
-  }
-
- private:
-  const uint32_t class_size_;
-
-  DISALLOW_COPY_AND_ASSIGN(InitializeClassVisitor);
-};
-
 mirror::Class* ClassLinker::AllocClass(Thread* self, mirror::Class* java_lang_Class,
                                        uint32_t class_size) {
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  InitializeClassVisitor visitor(class_size);
+  mirror::Class::InitializeClassVisitor visitor(class_size);
   mirror::Object* k = kMovingClasses ?
       heap->AllocObject<true>(self, java_lang_Class, class_size, visitor) :
       heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
@@ -1460,9 +1484,33 @@
       self, GetClassRoot(kJavaLangStackTraceElementArrayClass), length);
 }
 
-static mirror::Class* EnsureResolved(Thread* self, mirror::Class* klass)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+mirror::Class* ClassLinker::EnsureResolved(Thread* self, const char* descriptor,
+                                           mirror::Class* klass) {
   DCHECK(klass != NULL);
+
+  // For temporary classes we must wait for them to be retired.
+  if (init_done_ && klass->IsTemp()) {
+    CHECK(!klass->IsResolved());
+    if (klass->IsErroneous()) {
+      ThrowEarlierClassFailure(klass);
+      return nullptr;
+    }
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_class(hs.NewHandle(klass));
+    ObjectLock<mirror::Class> lock(self, h_class);
+    // Loop and wait for the resolving thread to retire this class.
+    while (!h_class->IsRetired() && !h_class->IsErroneous()) {
+      lock.WaitIgnoringInterrupts();
+    }
+    if (h_class->IsErroneous()) {
+      ThrowEarlierClassFailure(h_class.Get());
+      return nullptr;
+    }
+    CHECK(h_class->IsRetired());
+    // Get the updated class from class table.
+    klass = LookupClass(descriptor, h_class.Get()->GetClassLoader());
+  }
+
   // Wait for the class if it has not already been linked.
   if (!klass->IsResolved() && !klass->IsErroneous()) {
     StackHandleScope<1> hs(self);
@@ -1479,6 +1527,7 @@
       lock.WaitIgnoringInterrupts();
     }
   }
+
   if (klass->IsErroneous()) {
     ThrowEarlierClassFailure(klass);
     return nullptr;
@@ -1502,7 +1551,7 @@
   // Find the class in the loaded classes table.
   mirror::Class* klass = LookupClass(descriptor, class_loader.Get());
   if (klass != NULL) {
-    return EnsureResolved(self, klass);
+    return EnsureResolved(self, descriptor, klass);
   }
   // Class is not yet loaded.
   if (descriptor[0] == '[') {
@@ -1576,8 +1625,10 @@
                                         const DexFile& dex_file,
                                         const DexFile::ClassDef& dex_class_def) {
   Thread* self = Thread::Current();
-  StackHandleScope<2> hs(self);
+  StackHandleScope<3> hs(self);
   auto klass = hs.NewHandle<mirror::Class>(nullptr);
+  bool should_allocate = false;
+
   // Load the class from the dex file.
   if (UNLIKELY(!init_done_)) {
     // finish up init of hand crafted class_roots_
@@ -1587,6 +1638,8 @@
       klass.Assign(GetClassRoot(kJavaLangClass));
     } else if (strcmp(descriptor, "Ljava/lang/String;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangString));
+    } else if (strcmp(descriptor, "Ljava/lang/ref/Reference;") == 0) {
+      klass.Assign(GetClassRoot(kJavaLangRefReference));
     } else if (strcmp(descriptor, "Ljava/lang/DexCache;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangDexCache));
     } else if (strcmp(descriptor, "Ljava/lang/reflect/ArtField;") == 0) {
@@ -1594,10 +1647,18 @@
     } else if (strcmp(descriptor, "Ljava/lang/reflect/ArtMethod;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangReflectArtMethod));
     } else {
-      klass.Assign(AllocClass(self, SizeOfClass(dex_file, dex_class_def)));
+      should_allocate = true;
     }
   } else {
-    klass.Assign(AllocClass(self, SizeOfClass(dex_file, dex_class_def)));
+    should_allocate = true;
+  }
+
+  if (should_allocate) {
+    // Allocate a class with the status of not ready.
+    // Interface object should get the right size here. Regular class will
+    // figure out the right size later and be replaced with one of the right
+    // size when the class becomes resolved.
+    klass.Assign(AllocClass(self, SizeOfClassWithoutEmbeddedTables(dex_file, dex_class_def)));
   }
   if (UNLIKELY(klass.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // Expect an OOME.
@@ -1612,13 +1673,15 @@
   }
   ObjectLock<mirror::Class> lock(self, klass);
   klass->SetClinitThreadId(self->GetTid());
+
   // Add the newly loaded class to the loaded classes table.
   mirror::Class* existing = InsertClass(descriptor, klass.Get(), Hash(descriptor));
   if (existing != NULL) {
     // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
     // this thread to block.
-    return EnsureResolved(self, existing);
+    return EnsureResolved(self, descriptor, existing);
   }
+
   // Finish loading (if necessary) by finding parents
   CHECK(!klass->IsLoaded());
   if (!LoadSuperAndInterfaces(klass, dex_file)) {
@@ -1631,12 +1694,17 @@
   CHECK(!klass->IsResolved());
   // TODO: Use fast jobjects?
   auto interfaces = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
-  if (!LinkClass(self, klass, interfaces)) {
+
+  mirror::Class* new_class = nullptr;
+  if (!LinkClass(self, descriptor, klass, interfaces, &new_class)) {
     // Linking failed.
     klass->SetStatus(mirror::Class::kStatusError, self);
     return NULL;
   }
-  CHECK(klass->IsResolved());
+  CHECK(new_class != nullptr) << descriptor;
+  CHECK(new_class->IsResolved()) << descriptor;
+
+  Handle<mirror::Class> new_class_h(hs.NewHandle(new_class));
 
   /*
    * We send CLASS_PREPARE events to the debugger from here.  The
@@ -1649,14 +1717,13 @@
    * The class has been prepared and resolved but possibly not yet verified
    * at this point.
    */
-  Dbg::PostClassPrepare(klass.Get());
+  Dbg::PostClassPrepare(new_class_h.Get());
 
-  return klass.Get();
+  return new_class_h.Get();
 }
 
-// Precomputes size that will be needed for Class, matching LinkStaticFields
-uint32_t ClassLinker::SizeOfClass(const DexFile& dex_file,
-                                const DexFile::ClassDef& dex_class_def) {
+uint32_t ClassLinker::SizeOfClassWithoutEmbeddedTables(const DexFile& dex_file,
+                                                       const DexFile::ClassDef& dex_class_def) {
   const byte* class_data = dex_file.GetClassData(dex_class_def);
   size_t num_ref = 0;
   size_t num_32 = 0;
@@ -1675,24 +1742,7 @@
       }
     }
   }
-  // start with generic class data
-  uint32_t size = sizeof(mirror::Class);
-  // follow with reference fields which must be contiguous at start
-  size += (num_ref * sizeof(uint32_t));
-  // if there are 64-bit fields to add, make sure they are aligned
-  if (num_64 != 0 && size != RoundUp(size, 8)) {  // for 64-bit alignment
-    if (num_32 != 0) {
-      // use an available 32-bit field for padding
-      num_32--;
-    }
-    size += sizeof(uint32_t);  // either way, we are adding a word
-    DCHECK_EQ(size, RoundUp(size, 8));
-  }
-  // tack on any 64-bit fields now that alignment is assured
-  size += (num_64 * sizeof(uint64_t));
-  // tack on any remaining 32-bit fields
-  size += (num_32 * sizeof(uint32_t));
-  return size;
+  return mirror::Class::ComputeClassSize(false, 0, num_32, num_64, num_ref);
 }
 
 OatFile::OatClass ClassLinker::GetOatClass(const DexFile& dex_file, uint16_t class_def_idx) {
@@ -2306,7 +2356,7 @@
 }
 
 mirror::Class* ClassLinker::CreatePrimitiveClass(Thread* self, Primitive::Type type) {
-  mirror::Class* klass = AllocClass(self, sizeof(mirror::Class));
+  mirror::Class* klass = AllocClass(self, mirror::Class::PrimitiveClassSize());
   if (UNLIKELY(klass == NULL)) {
     return NULL;
   }
@@ -2411,7 +2461,7 @@
     }
   }
   if (new_class.Get() == nullptr) {
-    new_class.Assign(AllocClass(self, sizeof(mirror::Class)));
+    new_class.Assign(AllocClass(self, mirror::Array::ClassSize()));
     if (new_class.Get() == nullptr) {
       return nullptr;
     }
@@ -2424,6 +2474,8 @@
   new_class->SetVTable(java_lang_Object->GetVTable());
   new_class->SetPrimitiveType(Primitive::kPrimNot);
   new_class->SetClassLoader(component_type->GetClassLoader());
+  new_class->SetStatus(mirror::Class::kStatusLoaded, self);
+  new_class->PopulateEmbeddedImtAndVTable();
   new_class->SetStatus(mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
@@ -2517,7 +2569,8 @@
   if (existing != NULL) {
     return existing;
   }
-  if (kIsDebugBuild && klass->GetClassLoader() == NULL && dex_cache_image_class_lookup_required_) {
+  if (kIsDebugBuild && !klass->IsTemp() && klass->GetClassLoader() == NULL &&
+      dex_cache_image_class_lookup_required_) {
     // Check a class loaded with the system class loader matches one in the image if the class
     // is in the image.
     existing = LookupClassFromImage(descriptor);
@@ -2533,6 +2586,50 @@
   return NULL;
 }
 
+mirror::Class* ClassLinker::UpdateClass(const char* descriptor, mirror::Class* klass,
+                                        size_t hash) {
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  mirror::Class* existing =
+      LookupClassFromTableLocked(descriptor, klass->GetClassLoader(), hash);
+
+  if (existing == nullptr) {
+    CHECK(klass->IsProxyClass());
+    return nullptr;
+  }
+
+  CHECK_NE(existing, klass) << descriptor;
+  CHECK(!existing->IsResolved()) << descriptor;
+  CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusResolving) << descriptor;
+
+  for (auto it = class_table_.lower_bound(hash), end = class_table_.end(); it != end && it->first == hash;
+       ++it) {
+    mirror::Class* entry = it->second;
+    if (entry == existing) {
+      class_table_.erase(it);
+      break;
+    }
+  }
+
+  CHECK(!klass->IsTemp()) << descriptor;
+  if (kIsDebugBuild && klass->GetClassLoader() == nullptr &&
+      dex_cache_image_class_lookup_required_) {
+    // Check a class loaded with the system class loader matches one in the image if the class
+    // is in the image.
+    existing = LookupClassFromImage(descriptor);
+    if (existing != nullptr) {
+      CHECK(klass == existing) << descriptor;
+    }
+  }
+  VerifyObject(klass);
+
+  class_table_.insert(std::make_pair(hash, klass));
+  if (log_new_class_table_roots_) {
+    new_class_roots_.push_back(std::make_pair(hash, klass));
+  }
+
+  return existing;
+}
+
 bool ClassLinker::RemoveClass(const char* descriptor, const mirror::ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
@@ -2952,8 +3049,8 @@
                                              jobjectArray methods, jobjectArray throws) {
   Thread* self = soa.Self();
   StackHandleScope<8> hs(self);
-  Handle<mirror::Class> klass(hs.NewHandle(AllocClass(self, GetClassRoot(kJavaLangClass),
-                                                      sizeof(mirror::SynthesizedProxyClass))));
+  Handle<mirror::Class> klass(hs.NewHandle(
+      AllocClass(self, GetClassRoot(kJavaLangClass), sizeof(mirror::Class))));
   if (klass.Get() == NULL) {
     CHECK(self->IsExceptionPending());  // OOME.
     return NULL;
@@ -3044,20 +3141,31 @@
   klass->SetStatus(mirror::Class::kStatusLoaded, self);  // Now effectively in the loaded state.
   self->AssertNoPendingException();
 
+  std::string descriptor(GetDescriptorForProxy(klass.Get()));
+  mirror::Class* new_class = nullptr;
   {
-    ObjectLock<mirror::Class> lock(self, klass);  // Must hold lock on object when resolved.
+    ObjectLock<mirror::Class> resolution_lock(self, klass);  // Must hold lock on object when resolved.
     // Link the fields and virtual methods, creating vtable and iftables
-    Handle<mirror::ObjectArray<mirror::Class>> h_interfaces(
+    Handle<mirror::ObjectArray<mirror::Class> > h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
-    if (!LinkClass(self, klass, h_interfaces)) {
+    if (!LinkClass(self, descriptor.c_str(), klass, h_interfaces, &new_class)) {
       klass->SetStatus(mirror::Class::kStatusError, self);
       return nullptr;
     }
+  }
 
-    interfaces_sfield->SetObject<false>(
-        klass.Get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    throws_sfield->SetObject<false>(
-        klass.Get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
+  CHECK(klass->IsRetired());
+  CHECK_NE(klass.Get(), new_class);
+  klass.Assign(new_class);
+
+  CHECK_EQ(interfaces_sfield->GetDeclaringClass(), new_class);
+  interfaces_sfield->SetObject<false>(klass.Get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+  CHECK_EQ(throws_sfield->GetDeclaringClass(), new_class);
+  throws_sfield->SetObject<false>(klass.Get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
+
+  {
+    // Lock on klass is released. Lock new class object.
+    ObjectLock<mirror::Class> initialization_lock(self, klass);
     klass->SetStatus(mirror::Class::kStatusInitialized, self);
   }
 
@@ -3083,14 +3191,11 @@
                                                decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
 
-    mirror::SynthesizedProxyClass* synth_proxy_class =
-        down_cast<mirror::SynthesizedProxyClass*>(klass.Get());
-    CHECK_EQ(synth_proxy_class->GetInterfaces(),
+    CHECK_EQ(klass.Get()->GetInterfaces(),
              soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    CHECK_EQ(synth_proxy_class->GetThrows(),
+    CHECK_EQ(klass.Get()->GetThrows(),
              soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
   }
-  std::string descriptor(GetDescriptorForProxy(klass.Get()));
   mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), Hash(descriptor.c_str()));
   CHECK(existing == nullptr);
   return klass.Get();
@@ -3524,9 +3629,49 @@
   }
 }
 
-bool ClassLinker::LinkClass(Thread* self, Handle<mirror::Class> klass,
-                            Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
+void ClassLinker::FixupTemporaryDeclaringClass(mirror::Class* temp_class, mirror::Class* new_class) {
+  mirror::ObjectArray<mirror::ArtField>* fields = new_class->GetIFields();
+  if (fields != nullptr) {
+    for (int index = 0; index < fields->GetLength(); index ++) {
+      if (fields->Get(index)->GetDeclaringClass() == temp_class) {
+        fields->Get(index)->SetDeclaringClass(new_class);
+      }
+    }
+  }
+
+  fields = new_class->GetSFields();
+  if (fields != nullptr) {
+    for (int index = 0; index < fields->GetLength(); index ++) {
+      if (fields->Get(index)->GetDeclaringClass() == temp_class) {
+        fields->Get(index)->SetDeclaringClass(new_class);
+      }
+    }
+  }
+
+  mirror::ObjectArray<mirror::ArtMethod>* methods = new_class->GetDirectMethods();
+  if (methods != nullptr) {
+    for (int index = 0; index < methods->GetLength(); index ++) {
+      if (methods->Get(index)->GetDeclaringClass() == temp_class) {
+        methods->Get(index)->SetDeclaringClass(new_class);
+      }
+    }
+  }
+
+  methods = new_class->GetVirtualMethods();
+  if (methods != nullptr) {
+    for (int index = 0; index < methods->GetLength(); index ++) {
+      if (methods->Get(index)->GetDeclaringClass() == temp_class) {
+        methods->Get(index)->SetDeclaringClass(new_class);
+      }
+    }
+  }
+}
+
+bool ClassLinker::LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
+                            Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                            mirror::Class** new_class) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
+
   if (!LinkSuperClass(klass)) {
     return false;
   }
@@ -3536,13 +3681,60 @@
   if (!LinkInstanceFields(klass)) {
     return false;
   }
-  if (!LinkStaticFields(klass)) {
+  size_t class_size;
+  if (!LinkStaticFields(klass, &class_size)) {
     return false;
   }
   CreateReferenceInstanceOffsets(klass);
   CreateReferenceStaticOffsets(klass);
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
-  klass->SetStatus(mirror::Class::kStatusResolved, self);
+
+  if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
+    // We don't need to retire this class as it has no embedded tables or it was created the
+    // correct size during class linker initialization.
+    CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
+
+    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
+      klass->PopulateEmbeddedImtAndVTable();
+    }
+
+    // This will notify waiters on klass that saw the not yet resolved
+    // class in the class_table_ during EnsureResolved.
+    klass->SetStatus(mirror::Class::kStatusResolved, self);
+    *new_class = klass.Get();
+  } else {
+    CHECK(!klass->IsResolved());
+    // Retire the temporary class and create the correctly sized resolved class.
+    *new_class = klass->CopyOf(self, class_size);
+    if (UNLIKELY(*new_class == NULL)) {
+      CHECK(self->IsExceptionPending());  // Expect an OOME.
+      klass->SetStatus(mirror::Class::kStatusError, self);
+      return false;
+    }
+
+    CHECK_EQ((*new_class)->GetClassSize(), class_size);
+    StackHandleScope<1> hs(self);
+    auto new_class_h = hs.NewHandleWrapper<mirror::Class>(new_class);
+    ObjectLock<mirror::Class> lock(self, new_class_h);
+
+    FixupTemporaryDeclaringClass(klass.Get(), new_class_h.Get());
+
+    mirror::Class* existing = UpdateClass(descriptor, new_class_h.Get(), Hash(descriptor));
+    CHECK(existing == NULL || existing == klass.Get());
+
+    // This will notify waiters on temp class that saw the not yet resolved class in the
+    // class_table_ during EnsureResolved.
+    klass->SetStatus(mirror::Class::kStatusRetired, self);
+
+    CHECK_EQ(new_class_h->GetStatus(), mirror::Class::kStatusResolving);
+    // This will notify waiters on new_class that saw the not yet resolved
+    // class in the class_table_ during EnsureResolved.
+    new_class_h->SetStatus(mirror::Class::kStatusResolved, self);
+
+    // Only embedded imt should be used from this point.
+    new_class_h->SetImTable(NULL);
+    // TODO: remove vtable and only use embedded vtable.
+  }
   return true;
 }
 
@@ -3563,6 +3755,7 @@
                               PrettyDescriptor(klass.Get()).c_str());
       return false;
     }
+    CHECK(super_class->IsResolved());
     klass->SetSuperClass(super_class);
   }
   const DexFile::TypeList* interfaces = dex_file.GetInterfacesList(class_def);
@@ -3876,7 +4069,7 @@
   // Allocate imtable
   bool imtable_changed = false;
   Handle<mirror::ObjectArray<mirror::ArtMethod>> imtable(
-      hs.NewHandle(AllocArtMethodArray(self, kImtSize)));
+      hs.NewHandle(AllocArtMethodArray(self, mirror::Class::kImtSize)));
   if (UNLIKELY(imtable.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return false;
@@ -3923,7 +4116,7 @@
             }
             method_array->Set<false>(j, vtable_method);
             // Place method in imt if entry is empty, place conflict otherwise.
-            uint32_t imt_index = interface_method->GetDexMethodIndex() % kImtSize;
+            uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
             if (imtable->Get(imt_index) == NULL) {
               imtable->Set<false>(imt_index, vtable_method);
               imtable_changed = true;
@@ -3961,7 +4154,7 @@
   if (imtable_changed) {
     // Fill in empty entries in interface method table with conflict.
     mirror::ArtMethod* imt_conflict_method = runtime->GetImtConflictMethod();
-    for (size_t i = 0; i < kImtSize; i++) {
+    for (size_t i = 0; i < mirror::Class::kImtSize; i++) {
       if (imtable->Get(i) == NULL) {
         imtable->Set<false>(i, imt_conflict_method);
       }
@@ -4018,15 +4211,12 @@
 
 bool ClassLinker::LinkInstanceFields(Handle<mirror::Class> klass) {
   CHECK(klass.Get() != NULL);
-  return LinkFields(klass, false);
+  return LinkFields(klass, false, nullptr);
 }
 
-bool ClassLinker::LinkStaticFields(Handle<mirror::Class> klass) {
+bool ClassLinker::LinkStaticFields(Handle<mirror::Class> klass, size_t* class_size) {
   CHECK(klass.Get() != NULL);
-  size_t allocated_class_size = klass->GetClassSize();
-  bool success = LinkFields(klass, true);
-  CHECK_EQ(allocated_class_size, klass->GetClassSize());
-  return success;
+  return LinkFields(klass, true, class_size);
 }
 
 struct LinkFieldsComparator {
@@ -4056,19 +4246,23 @@
   }
 };
 
-bool ClassLinker::LinkFields(Handle<mirror::Class> klass, bool is_static) {
+bool ClassLinker::LinkFields(Handle<mirror::Class> klass, bool is_static, size_t* class_size) {
   size_t num_fields =
       is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
 
   mirror::ObjectArray<mirror::ArtField>* fields =
       is_static ? klass->GetSFields() : klass->GetIFields();
 
-  // Initialize size and field_offset
-  size_t size;
+  // Initialize field_offset
   MemberOffset field_offset(0);
   if (is_static) {
-    size = klass->GetClassSize();
-    field_offset = mirror::Class::FieldsOffset();
+    uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
+    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
+      // Static fields come after the embedded tables.
+      base = mirror::Class::ComputeClassSize(true, klass->GetVTableDuringLinking()->GetLength(),
+                                             0, 0, 0);
+    }
+    field_offset = MemberOffset(base);
   } else {
     mirror::Class* super_class = klass->GetSuperClass();
     if (super_class != NULL) {
@@ -4076,7 +4270,6 @@
           << PrettyClass(klass.Get()) << " " << PrettyClass(super_class);
       field_offset = MemberOffset(super_class->GetObjectSize());
     }
-    size = field_offset.Uint32Value();
   }
 
   CHECK_EQ(num_fields == 0, fields == NULL) << PrettyClass(klass.Get());
@@ -4189,11 +4382,12 @@
       DCHECK_EQ(num_fields, num_reference_fields) << PrettyClass(klass.Get());
     }
   }
-  size = field_offset.Uint32Value();
+
+  size_t size = field_offset.Uint32Value();
   // Update klass
   if (is_static) {
     klass->SetNumReferenceStaticFields(num_reference_fields);
-    klass->SetClassSize(size);
+    *class_size = size;
   } else {
     klass->SetNumReferenceInstanceFields(num_reference_fields);
     if (!klass->IsVariableSize()) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 60dad7b..c17f88d 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -31,6 +31,7 @@
 #include "read_barrier.h"
 
 namespace art {
+
 namespace gc {
 namespace space {
   class ImageSpace;
@@ -56,11 +57,6 @@
 
 class ClassLinker {
  public:
-  // Interface method table size. Increasing this value reduces the chance of two interface methods
-  // colliding in the interface method table but increases the size of classes that implement
-  // (non-marker) interfaces.
-  static constexpr size_t kImtSize = 64;
-
   explicit ClassLinker(InternTable* intern_table);
   ~ClassLinker();
 
@@ -385,6 +381,14 @@
   // Special code to allocate an art method, use this instead of class->AllocObject.
   mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::ObjectArray<mirror::Class>* GetClassRoots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ObjectArray<mirror::Class>* class_roots =
+        ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Class>, kWithReadBarrier>(
+            &class_roots_);
+    DCHECK(class_roots != NULL);
+    return class_roots;
+  }
+
  private:
   const OatFile::OatMethod GetOatMethodFor(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -427,8 +431,10 @@
                          mirror::Class* c, SafeMap<uint32_t, mirror::ArtField*>& field_map)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t SizeOfClass(const DexFile& dex_file,
-                     const DexFile::ClassDef& dex_class_def);
+  // Precomputes size needed for Class, in the case of a non-temporary class this size must be
+  // sufficient to hold all static fields.
+  uint32_t SizeOfClassWithoutEmbeddedTables(const DexFile& dex_file,
+                                            const DexFile::ClassDef& dex_class_def);
 
   void LoadClass(const DexFile& dex_file,
                  const DexFile::ClassDef& dex_class_def,
@@ -481,8 +487,9 @@
                                                      mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkClass(Thread* self, Handle<mirror::Class> klass,
-                 Handle<mirror::ObjectArray<mirror::Class>> interfaces)
+  bool LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
+                 Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                 mirror::Class** new_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkSuperClass(Handle<mirror::Class> klass)
@@ -502,17 +509,16 @@
                             Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkStaticFields(Handle<mirror::Class> klass)
+  bool LinkStaticFields(Handle<mirror::Class> klass, size_t* class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool LinkInstanceFields(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool LinkFields(Handle<mirror::Class> klass, bool is_static)
+  bool LinkFields(Handle<mirror::Class> klass, bool is_static, size_t* class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void LinkCode(Handle<mirror::ArtMethod> method, const OatFile::OatClass* oat_class,
                 const DexFile& dex_file, uint32_t dex_method_index, uint32_t method_index)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-
   void CreateReferenceInstanceOffsets(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void CreateReferenceStaticOffsets(Handle<mirror::Class> klass)
@@ -565,7 +571,8 @@
   const OatFile* FindOatFileContainingDexFileFromDexLocation(const char* location,
                                                              const uint32_t* const location_checksum,
                                                              InstructionSet isa,
-                                                             std::vector<std::string>* error_msgs)
+                                                             std::vector<std::string>* error_msgs,
+                                                             bool* obsolete_file_cleanup_failed)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
 
   // Find a verify an oat file with the given dex file. Will return nullptr when the oat file
@@ -612,11 +619,27 @@
                                             size_t hash)
       SHARED_LOCKS_REQUIRED(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
+  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash)
+      LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void MoveImageClassesToClassTable() LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::Class* LookupClassFromImage(const char* descriptor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // EnsureResolved is called to make sure that a class in the class_table_ has been resolved
+  // before returning it to the caller. Its the responsibility of the thread that placed the class
+  // in the table to make it resolved. The thread doing resolution must notify on the class' lock
+  // when resolution has occurred. This happens in mirror::Class::SetStatus. As resolution may
+  // retire a class, the version of the class in the table is returned and this may differ from
+  // the class passed in.
+  mirror::Class* EnsureResolved(Thread* self, const char* descriptor, mirror::Class* klass)
+      WARN_UNUSED SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void FixupTemporaryDeclaringClass(mirror::Class* temp_class, mirror::Class* new_class)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // indexes into class_roots_.
   // needs to be kept in sync with class_roots_descriptors_.
   enum ClassRoot {
@@ -664,14 +687,6 @@
   void SetClassRoot(ClassRoot class_root, mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ObjectArray<mirror::Class>* GetClassRoots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ObjectArray<mirror::Class>* class_roots =
-        ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Class>, kWithReadBarrier>(
-            &class_roots_);
-    DCHECK(class_roots != NULL);
-    return class_roots;
-  }
-
   static const char* class_roots_descriptors_[];
 
   const char* GetClassRootDescriptor(ClassRoot class_root) {
@@ -695,6 +710,8 @@
 
   InternTable* intern_table_;
 
+  // Trampolines within the image the bounce to runtime entrypoints. Done so that there is a single
+  // patch point within the image. TODO: make these proper relocations.
   const void* portable_resolution_trampoline_;
   const void* quick_resolution_trampoline_;
   const void* portable_imt_conflict_trampoline_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 04f6946..21fe006 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -22,7 +22,8 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
+#include "field_helper.h"
 #include "gc/heap.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method.h"
@@ -34,7 +35,10 @@
 #include "mirror/proxy.h"
 #include "mirror/reference.h"
 #include "mirror/stack_trace_element.h"
+#include "mirror/string-inl.h"
 #include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 
@@ -572,37 +576,6 @@
   };
 };
 
-struct ClassClassOffsets : public CheckOffsets<mirror::ClassClass> {
-  ClassClassOffsets() : CheckOffsets<mirror::ClassClass>(true, "Ljava/lang/Class;") {
-    // alphabetical 64-bit
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ClassClass, serialVersionUID_), "serialVersionUID"));
-  };
-};
-
-struct StringClassOffsets : public CheckOffsets<mirror::StringClass> {
-  StringClassOffsets() : CheckOffsets<mirror::StringClass>(true, "Ljava/lang/String;") {
-    // alphabetical references
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StringClass, ASCII_),                  "ASCII"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StringClass, CASE_INSENSITIVE_ORDER_), "CASE_INSENSITIVE_ORDER"));
-
-    // alphabetical 32-bit
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StringClass, REPLACEMENT_CHAR_),       "REPLACEMENT_CHAR"));
-
-    // alphabetical 64-bit
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StringClass, serialVersionUID_),       "serialVersionUID"));
-  };
-};
-
-struct ArtFieldClassOffsets : public CheckOffsets<mirror::ArtFieldClass> {
-  ArtFieldClassOffsets() : CheckOffsets<mirror::ArtFieldClass>(true, "Ljava/lang/reflect/ArtField;") {
-  };
-};
-
-struct ArtMethodClassOffsets : public CheckOffsets<mirror::ArtMethodClass> {
-  ArtMethodClassOffsets() : CheckOffsets<mirror::ArtMethodClass>(true, "Ljava/lang/reflect/ArtMethod;") {
-  };
-};
-
 struct DexCacheOffsets : public CheckOffsets<mirror::DexCache> {
   DexCacheOffsets() : CheckOffsets<mirror::DexCache>(false, "Ljava/lang/DexCache;") {
     // alphabetical references
@@ -652,11 +625,6 @@
   EXPECT_TRUE(DexCacheOffsets().Check());
   EXPECT_TRUE(ReferenceOffsets().Check());
   EXPECT_TRUE(FinalizerReferenceOffsets().Check());
-
-  EXPECT_TRUE(ClassClassOffsets().Check());
-  EXPECT_TRUE(StringClassOffsets().Check());
-  EXPECT_TRUE(ArtFieldClassOffsets().Check());
-  EXPECT_TRUE(ArtMethodClassOffsets().Check());
 }
 
 TEST_F(ClassLinkerTest, FindClassNonexistent) {
@@ -1091,4 +1059,28 @@
   }
 }
 
+TEST_F(ClassLinkerTest, ValidatePredefinedClassSizes) {
+  ScopedObjectAccess soa(Thread::Current());
+  NullHandle<mirror::ClassLoader> class_loader;
+  mirror::Class* c;
+
+  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Class;", class_loader);
+  EXPECT_EQ(c->GetClassSize(), mirror::Class::ClassClassSize());
+
+  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Object;", class_loader);
+  EXPECT_EQ(c->GetClassSize(), mirror::Object::ClassSize());
+
+  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/String;", class_loader);
+  EXPECT_EQ(c->GetClassSize(), mirror::String::ClassSize());
+
+  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/DexCache;", class_loader);
+  EXPECT_EQ(c->GetClassSize(), mirror::DexCache::ClassSize());
+
+  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/reflect/ArtField;", class_loader);
+  EXPECT_EQ(c->GetClassSize(), mirror::ArtField::ClassSize());
+
+  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/reflect/ArtMethod;", class_loader);
+  EXPECT_EQ(c->GetClassSize(), mirror::ArtMethod::ClassSize());
+}
+
 }  // namespace art
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 0ed8b63..f47f13d 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -14,8 +14,31 @@
  * limitations under the License.
  */
 
+#include "common_runtime_test.h"
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <ScopedLocalRef.h>
+
+#include "../../external/icu/icu4c/source/common/unicode/uvernum.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+#include "class_linker.h"
+#include "compiler_callbacks.h"
+#include "dex_file.h"
+#include "gc/heap.h"
 #include "gtest/gtest.h"
+#include "jni_internal.h"
+#include "mirror/class_loader.h"
+#include "noop_compiler_callbacks.h"
+#include "os.h"
+#include "runtime-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+#include "well_known_classes.h"
 
 int main(int argc, char **argv) {
   art::InitLogging(argv);
@@ -23,3 +46,293 @@
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
+
+namespace art {
+
+ScratchFile::ScratchFile() {
+  // ANDROID_DATA needs to be set
+  CHECK_NE(static_cast<char*>(nullptr), getenv("ANDROID_DATA")) <<
+      "Are you subclassing RuntimeTest?";
+  filename_ = getenv("ANDROID_DATA");
+  filename_ += "/TmpFile-XXXXXX";
+  int fd = mkstemp(&filename_[0]);
+  CHECK_NE(-1, fd);
+  file_.reset(new File(fd, GetFilename()));
+}
+
+ScratchFile::ScratchFile(const ScratchFile& other, const char* suffix) {
+  filename_ = other.GetFilename();
+  filename_ += suffix;
+  int fd = open(filename_.c_str(), O_RDWR | O_CREAT, 0666);
+  CHECK_NE(-1, fd);
+  file_.reset(new File(fd, GetFilename()));
+}
+
+ScratchFile::ScratchFile(File* file) {
+  CHECK(file != NULL);
+  filename_ = file->GetPath();
+  file_.reset(file);
+}
+
+ScratchFile::~ScratchFile() {
+  Unlink();
+}
+
+int ScratchFile::GetFd() const {
+  return file_->Fd();
+}
+
+void ScratchFile::Unlink() {
+  if (!OS::FileExists(filename_.c_str())) {
+    return;
+  }
+  int unlink_result = unlink(filename_.c_str());
+  CHECK_EQ(0, unlink_result);
+}
+
+CommonRuntimeTest::CommonRuntimeTest() {}
+CommonRuntimeTest::~CommonRuntimeTest() {}
+
+void CommonRuntimeTest::SetEnvironmentVariables(std::string& android_data) {
+  if (IsHost()) {
+    // $ANDROID_ROOT is set on the device, but not necessarily on the host.
+    // But it needs to be set so that icu4c can find its locale data.
+    const char* android_root_from_env = getenv("ANDROID_ROOT");
+    if (android_root_from_env == nullptr) {
+      // Use ANDROID_HOST_OUT for ANDROID_ROOT if it is set.
+      const char* android_host_out = getenv("ANDROID_HOST_OUT");
+      if (android_host_out != nullptr) {
+        setenv("ANDROID_ROOT", android_host_out, 1);
+      } else {
+        // Build it from ANDROID_BUILD_TOP or cwd
+        std::string root;
+        const char* android_build_top = getenv("ANDROID_BUILD_TOP");
+        if (android_build_top != nullptr) {
+          root += android_build_top;
+        } else {
+          // Not set by build server, so default to current directory
+          char* cwd = getcwd(nullptr, 0);
+          setenv("ANDROID_BUILD_TOP", cwd, 1);
+          root += cwd;
+          free(cwd);
+        }
+#if defined(__linux__)
+        root += "/out/host/linux-x86";
+#elif defined(__APPLE__)
+        root += "/out/host/darwin-x86";
+#else
+#error unsupported OS
+#endif
+        setenv("ANDROID_ROOT", root.c_str(), 1);
+      }
+    }
+    setenv("LD_LIBRARY_PATH", ":", 0);  // Required by java.lang.System.<clinit>.
+
+    // Not set by build server, so default
+    if (getenv("ANDROID_HOST_OUT") == nullptr) {
+      setenv("ANDROID_HOST_OUT", getenv("ANDROID_ROOT"), 1);
+    }
+  }
+
+  // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
+  android_data = (IsHost() ? "/tmp/art-data-XXXXXX" : "/data/dalvik-cache/art-data-XXXXXX");
+  if (mkdtemp(&android_data[0]) == nullptr) {
+    PLOG(FATAL) << "mkdtemp(\"" << &android_data[0] << "\") failed";
+  }
+  setenv("ANDROID_DATA", android_data.c_str(), 1);
+}
+
+const DexFile* CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
+  std::vector<const DexFile*> dex_files;
+  std::string error_msg;
+  if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
+    LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
+    return nullptr;
+  } else {
+    CHECK_EQ(1U, dex_files.size()) << "Expected only one dex file in " << location;
+    return dex_files[0];
+  }
+}
+
+void CommonRuntimeTest::SetUp() {
+  SetEnvironmentVariables(android_data_);
+  dalvik_cache_.append(android_data_.c_str());
+  dalvik_cache_.append("/dalvik-cache");
+  int mkdir_result = mkdir(dalvik_cache_.c_str(), 0700);
+  ASSERT_EQ(mkdir_result, 0);
+
+  std::string error_msg;
+  java_lang_dex_file_ = LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str());
+  boot_class_path_.push_back(java_lang_dex_file_);
+
+  std::string min_heap_string(StringPrintf("-Xms%zdm", gc::Heap::kDefaultInitialSize / MB));
+  std::string max_heap_string(StringPrintf("-Xmx%zdm", gc::Heap::kDefaultMaximumSize / MB));
+
+  callbacks_.reset(new NoopCompilerCallbacks());
+
+  RuntimeOptions options;
+  options.push_back(std::make_pair("bootclasspath", &boot_class_path_));
+  options.push_back(std::make_pair("-Xcheck:jni", nullptr));
+  options.push_back(std::make_pair(min_heap_string.c_str(), nullptr));
+  options.push_back(std::make_pair(max_heap_string.c_str(), nullptr));
+  options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+  SetUpRuntimeOptions(&options);
+  if (!Runtime::Create(options, false)) {
+    LOG(FATAL) << "Failed to create runtime";
+    return;
+  }
+  runtime_.reset(Runtime::Current());
+  class_linker_ = runtime_->GetClassLinker();
+  class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
+  class_linker_->RunRootClinits();
+
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+  // We're back in native, take the opportunity to initialize well known classes.
+  WellKnownClasses::Init(Thread::Current()->GetJniEnv());
+
+  // Create the heap thread pool so that the GC runs in parallel for tests. Normally, the thread
+  // pool is created by the runtime.
+  runtime_->GetHeap()->CreateThreadPool();
+  runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
+}
+
+void CommonRuntimeTest::TearDown() {
+  const char* android_data = getenv("ANDROID_DATA");
+  ASSERT_TRUE(android_data != nullptr);
+  DIR* dir = opendir(dalvik_cache_.c_str());
+  ASSERT_TRUE(dir != nullptr);
+  dirent* e;
+  while ((e = readdir(dir)) != nullptr) {
+    if ((strcmp(e->d_name, ".") == 0) || (strcmp(e->d_name, "..") == 0)) {
+      continue;
+    }
+    std::string filename(dalvik_cache_);
+    filename.push_back('/');
+    filename.append(e->d_name);
+    int unlink_result = unlink(filename.c_str());
+    ASSERT_EQ(0, unlink_result);
+  }
+  closedir(dir);
+  int rmdir_cache_result = rmdir(dalvik_cache_.c_str());
+  ASSERT_EQ(0, rmdir_cache_result);
+  int rmdir_data_result = rmdir(android_data_.c_str());
+  ASSERT_EQ(0, rmdir_data_result);
+
+  // icu4c has a fixed 10-element array "gCommonICUDataArray".
+  // If we run > 10 tests, we fill that array and u_setCommonData fails.
+  // There's a function to clear the array, but it's not public...
+  typedef void (*IcuCleanupFn)();
+  void* sym = dlsym(RTLD_DEFAULT, "u_cleanup_" U_ICU_VERSION_SHORT);
+  CHECK(sym != nullptr) << dlerror();
+  IcuCleanupFn icu_cleanup_fn = reinterpret_cast<IcuCleanupFn>(sym);
+  (*icu_cleanup_fn)();
+
+  STLDeleteElements(&opened_dex_files_);
+
+  Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
+}
+
+std::string CommonRuntimeTest::GetLibCoreDexFileName() {
+  return GetDexFileName("core-libart");
+}
+
+std::string CommonRuntimeTest::GetDexFileName(const std::string& jar_prefix) {
+  if (IsHost()) {
+    const char* host_dir = getenv("ANDROID_HOST_OUT");
+    CHECK(host_dir != nullptr);
+    return StringPrintf("%s/framework/%s-hostdex.jar", host_dir, jar_prefix.c_str());
+  }
+  return StringPrintf("%s/framework/%s.jar", GetAndroidRoot(), jar_prefix.c_str());
+}
+
+std::string CommonRuntimeTest::GetTestAndroidRoot() {
+  if (IsHost()) {
+    const char* host_dir = getenv("ANDROID_HOST_OUT");
+    CHECK(host_dir != nullptr);
+    return host_dir;
+  }
+  return GetAndroidRoot();
+}
+
+std::vector<const DexFile*> CommonRuntimeTest::OpenTestDexFiles(const char* name) {
+  CHECK(name != nullptr);
+  std::string filename;
+  if (IsHost()) {
+    filename += getenv("ANDROID_HOST_OUT");
+    filename += "/framework/";
+  } else {
+    filename += "/data/nativetest/art/";
+  }
+  filename += "art-gtest-";
+  filename += name;
+  filename += ".jar";
+  std::string error_msg;
+  std::vector<const DexFile*> dex_files;
+  bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
+  CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
+  for (const DexFile* dex_file : dex_files) {
+    CHECK_EQ(PROT_READ, dex_file->GetPermissions());
+    CHECK(dex_file->IsReadOnly());
+  }
+  opened_dex_files_.insert(opened_dex_files_.end(), dex_files.begin(), dex_files.end());
+  return dex_files;
+}
+
+const DexFile* CommonRuntimeTest::OpenTestDexFile(const char* name) {
+  std::vector<const DexFile*> vector = OpenTestDexFiles(name);
+  EXPECT_EQ(1U, vector.size());
+  return vector[0];
+}
+
+jobject CommonRuntimeTest::LoadDex(const char* dex_name) {
+  std::vector<const DexFile*> dex_files = OpenTestDexFiles(dex_name);
+  CHECK_NE(0U, dex_files.size());
+  for (const DexFile* dex_file : dex_files) {
+    class_linker_->RegisterDexFile(*dex_file);
+  }
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  ScopedLocalRef<jobject> class_loader_local(soa.Env(),
+      soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
+  jobject class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
+  soa.Self()->SetClassLoaderOverride(soa.Decode<mirror::ClassLoader*>(class_loader_local.get()));
+  Runtime::Current()->SetCompileTimeClassPath(class_loader, dex_files);
+  return class_loader;
+}
+
+CheckJniAbortCatcher::CheckJniAbortCatcher() : vm_(Runtime::Current()->GetJavaVM()) {
+  vm_->check_jni_abort_hook = Hook;
+  vm_->check_jni_abort_hook_data = &actual_;
+}
+
+CheckJniAbortCatcher::~CheckJniAbortCatcher() {
+  vm_->check_jni_abort_hook = nullptr;
+  vm_->check_jni_abort_hook_data = nullptr;
+  EXPECT_TRUE(actual_.empty()) << actual_;
+}
+
+void CheckJniAbortCatcher::Check(const char* expected_text) {
+  EXPECT_TRUE(actual_.find(expected_text) != std::string::npos) << "\n"
+      << "Expected to find: " << expected_text << "\n"
+      << "In the output   : " << actual_;
+  actual_.clear();
+}
+
+void CheckJniAbortCatcher::Hook(void* data, const std::string& reason) {
+  // We use += because when we're hooking the aborts like this, multiple problems can be found.
+  *reinterpret_cast<std::string*>(data) += reason;
+}
+
+}  // namespace art
+
+namespace std {
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs) {
+os << ::art::ToString(rhs);
+return os;
+}
+
+}  // namespace std
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 289dc1d..d045031 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -17,73 +17,33 @@
 #ifndef ART_RUNTIME_COMMON_RUNTIME_TEST_H_
 #define ART_RUNTIME_COMMON_RUNTIME_TEST_H_
 
-#include <dirent.h>
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fstream>
-#include <memory>
+#include <gtest/gtest.h>
+#include <jni.h>
 
-#include "../../external/icu/icu4c/source/common/unicode/uvernum.h"
-#include "base/macros.h"
-#include "base/stl_util.h"
-#include "base/stringprintf.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "entrypoints/entrypoint_utils.h"
-#include "gc/heap.h"
-#include "gtest/gtest.h"
-#include "instruction_set.h"
-#include "interpreter/interpreter.h"
-#include "mirror/class_loader.h"
-#include "noop_compiler_callbacks.h"
-#include "oat_file.h"
-#include "object_utils.h"
+#include <string>
+
+#include "base/mutex.h"
+#include "globals.h"
 #include "os.h"
-#include "runtime.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedLocalRef.h"
-#include "thread.h"
-#include "utils.h"
-#include "verifier/method_verifier.h"
-#include "verifier/method_verifier-inl.h"
-#include "well_known_classes.h"
 
 namespace art {
 
+class ClassLinker;
+class CompilerCallbacks;
+class DexFile;
+class JavaVMExt;
+class Runtime;
+typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
+
 class ScratchFile {
  public:
-  ScratchFile() {
-    // ANDROID_DATA needs to be set
-    CHECK_NE(static_cast<char*>(nullptr), getenv("ANDROID_DATA")) <<
-        "Are you subclassing RuntimeTest?";
-    filename_ = getenv("ANDROID_DATA");
-    filename_ += "/TmpFile-XXXXXX";
-    int fd = mkstemp(&filename_[0]);
-    CHECK_NE(-1, fd);
-    file_.reset(new File(fd, GetFilename()));
-  }
+  ScratchFile();
 
-  ScratchFile(const ScratchFile& other, const char* suffix) {
-    filename_ = other.GetFilename();
-    filename_ += suffix;
-    int fd = open(filename_.c_str(), O_RDWR | O_CREAT, 0666);
-    CHECK_NE(-1, fd);
-    file_.reset(new File(fd, GetFilename()));
-  }
+  ScratchFile(const ScratchFile& other, const char* suffix);
 
-  explicit ScratchFile(File* file) {
-    CHECK(file != NULL);
-    filename_ = file->GetPath();
-    file_.reset(file);
-  }
+  explicit ScratchFile(File* file);
 
-  ~ScratchFile() {
-    Unlink();
-  }
+  ~ScratchFile();
 
   const std::string& GetFilename() const {
     return filename_;
@@ -93,17 +53,9 @@
     return file_.get();
   }
 
-  int GetFd() const {
-    return file_->Fd();
-  }
+  int GetFd() const;
 
-  void Unlink() {
-    if (!OS::FileExists(filename_.c_str())) {
-      return;
-    }
-    int unlink_result = unlink(filename_.c_str());
-    CHECK_EQ(0, unlink_result);
-  }
+  void Unlink();
 
  private:
   std::string filename_;
@@ -112,221 +64,37 @@
 
 class CommonRuntimeTest : public testing::Test {
  public:
-  static void SetEnvironmentVariables(std::string& android_data) {
-    if (IsHost()) {
-      // $ANDROID_ROOT is set on the device, but not necessarily on the host.
-      // But it needs to be set so that icu4c can find its locale data.
-      const char* android_root_from_env = getenv("ANDROID_ROOT");
-      if (android_root_from_env == nullptr) {
-        // Use ANDROID_HOST_OUT for ANDROID_ROOT if it is set.
-        const char* android_host_out = getenv("ANDROID_HOST_OUT");
-        if (android_host_out != nullptr) {
-          setenv("ANDROID_ROOT", android_host_out, 1);
-        } else {
-          // Build it from ANDROID_BUILD_TOP or cwd
-          std::string root;
-          const char* android_build_top = getenv("ANDROID_BUILD_TOP");
-          if (android_build_top != nullptr) {
-            root += android_build_top;
-          } else {
-            // Not set by build server, so default to current directory
-            char* cwd = getcwd(nullptr, 0);
-            setenv("ANDROID_BUILD_TOP", cwd, 1);
-            root += cwd;
-            free(cwd);
-          }
-#if defined(__linux__)
-          root += "/out/host/linux-x86";
-#elif defined(__APPLE__)
-          root += "/out/host/darwin-x86";
-#else
-#error unsupported OS
-#endif
-          setenv("ANDROID_ROOT", root.c_str(), 1);
-        }
-      }
-      setenv("LD_LIBRARY_PATH", ":", 0);  // Required by java.lang.System.<clinit>.
+  static void SetEnvironmentVariables(std::string& android_data);
 
-      // Not set by build server, so default
-      if (getenv("ANDROID_HOST_OUT") == nullptr) {
-        setenv("ANDROID_HOST_OUT", getenv("ANDROID_ROOT"), 1);
-      }
-    }
-
-    // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
-    android_data = (IsHost() ? "/tmp/art-data-XXXXXX" : "/data/dalvik-cache/art-data-XXXXXX");
-    if (mkdtemp(&android_data[0]) == nullptr) {
-      PLOG(FATAL) << "mkdtemp(\"" << &android_data[0] << "\") failed";
-    }
-    setenv("ANDROID_DATA", android_data.c_str(), 1);
-  }
+  CommonRuntimeTest();
+  ~CommonRuntimeTest();
 
  protected:
   static bool IsHost() {
     return !kIsTargetBuild;
   }
 
-  const DexFile* LoadExpectSingleDexFile(const char* location) {
-    std::vector<const DexFile*> dex_files;
-    std::string error_msg;
-    if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
-      LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
-      return nullptr;
-    } else {
-      CHECK_EQ(1U, dex_files.size()) << "Expected only one dex file in " << location;
-      return dex_files[0];
-    }
-  }
+  const DexFile* LoadExpectSingleDexFile(const char* location);
 
-  virtual void SetUp() {
-    SetEnvironmentVariables(android_data_);
-    dalvik_cache_.append(android_data_.c_str());
-    dalvik_cache_.append("/dalvik-cache");
-    int mkdir_result = mkdir(dalvik_cache_.c_str(), 0700);
-    ASSERT_EQ(mkdir_result, 0);
-
-    std::string error_msg;
-    java_lang_dex_file_ = LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str());
-    boot_class_path_.push_back(java_lang_dex_file_);
-
-    std::string min_heap_string(StringPrintf("-Xms%zdm", gc::Heap::kDefaultInitialSize / MB));
-    std::string max_heap_string(StringPrintf("-Xmx%zdm", gc::Heap::kDefaultMaximumSize / MB));
-
-    Runtime::Options options;
-    options.push_back(std::make_pair("bootclasspath", &boot_class_path_));
-    options.push_back(std::make_pair("-Xcheck:jni", nullptr));
-    options.push_back(std::make_pair(min_heap_string.c_str(), nullptr));
-    options.push_back(std::make_pair(max_heap_string.c_str(), nullptr));
-    options.push_back(std::make_pair("compilercallbacks", &callbacks_));
-    SetUpRuntimeOptions(&options);
-    if (!Runtime::Create(options, false)) {
-      LOG(FATAL) << "Failed to create runtime";
-      return;
-    }
-    runtime_.reset(Runtime::Current());
-    class_linker_ = runtime_->GetClassLinker();
-    class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
-
-    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-    // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
-    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
-
-    // We're back in native, take the opportunity to initialize well known classes.
-    WellKnownClasses::Init(Thread::Current()->GetJniEnv());
-
-    // Create the heap thread pool so that the GC runs in parallel for tests. Normally, the thread
-    // pool is created by the runtime.
-    runtime_->GetHeap()->CreateThreadPool();
-    runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
-  }
+  virtual void SetUp();
 
   // Allow subclases such as CommonCompilerTest to add extra options.
-  virtual void SetUpRuntimeOptions(Runtime::Options *options) {}
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options) {}
 
-  virtual void TearDown() {
-    const char* android_data = getenv("ANDROID_DATA");
-    ASSERT_TRUE(android_data != nullptr);
-    DIR* dir = opendir(dalvik_cache_.c_str());
-    ASSERT_TRUE(dir != nullptr);
-    dirent* e;
-    while ((e = readdir(dir)) != nullptr) {
-      if ((strcmp(e->d_name, ".") == 0) || (strcmp(e->d_name, "..") == 0)) {
-        continue;
-      }
-      std::string filename(dalvik_cache_);
-      filename.push_back('/');
-      filename.append(e->d_name);
-      int unlink_result = unlink(filename.c_str());
-      ASSERT_EQ(0, unlink_result);
-    }
-    closedir(dir);
-    int rmdir_cache_result = rmdir(dalvik_cache_.c_str());
-    ASSERT_EQ(0, rmdir_cache_result);
-    int rmdir_data_result = rmdir(android_data_.c_str());
-    ASSERT_EQ(0, rmdir_data_result);
+  virtual void TearDown();
 
-    // icu4c has a fixed 10-element array "gCommonICUDataArray".
-    // If we run > 10 tests, we fill that array and u_setCommonData fails.
-    // There's a function to clear the array, but it's not public...
-    typedef void (*IcuCleanupFn)();
-    void* sym = dlsym(RTLD_DEFAULT, "u_cleanup_" U_ICU_VERSION_SHORT);
-    CHECK(sym != nullptr) << dlerror();
-    IcuCleanupFn icu_cleanup_fn = reinterpret_cast<IcuCleanupFn>(sym);
-    (*icu_cleanup_fn)();
+  std::string GetLibCoreDexFileName();
 
-    STLDeleteElements(&opened_dex_files_);
+  std::string GetDexFileName(const std::string& jar_prefix);
 
-    Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
-  }
-
-  std::string GetLibCoreDexFileName() {
-    return GetDexFileName("core-libart");
-  }
-
-  std::string GetDexFileName(const std::string& jar_prefix) {
-    if (IsHost()) {
-      const char* host_dir = getenv("ANDROID_HOST_OUT");
-      CHECK(host_dir != nullptr);
-      return StringPrintf("%s/framework/%s-hostdex.jar", host_dir, jar_prefix.c_str());
-    }
-    return StringPrintf("%s/framework/%s.jar", GetAndroidRoot(), jar_prefix.c_str());
-  }
-
-  std::string GetTestAndroidRoot() {
-    if (IsHost()) {
-      const char* host_dir = getenv("ANDROID_HOST_OUT");
-      CHECK(host_dir != nullptr);
-      return host_dir;
-    }
-    return GetAndroidRoot();
-  }
+  std::string GetTestAndroidRoot();
 
   std::vector<const DexFile*> OpenTestDexFiles(const char* name)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(name != nullptr);
-    std::string filename;
-    if (IsHost()) {
-      filename += getenv("ANDROID_HOST_OUT");
-      filename += "/framework/";
-    } else {
-      filename += "/data/nativetest/art/";
-    }
-    filename += "art-gtest-";
-    filename += name;
-    filename += ".jar";
-    std::string error_msg;
-    std::vector<const DexFile*> dex_files;
-    bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
-    CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
-    for (const DexFile* dex_file : dex_files) {
-      CHECK_EQ(PROT_READ, dex_file->GetPermissions());
-      CHECK(dex_file->IsReadOnly());
-    }
-    opened_dex_files_.insert(opened_dex_files_.end(), dex_files.begin(), dex_files.end());
-    return dex_files;
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const DexFile* OpenTestDexFile(const char* name)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::vector<const DexFile*> vector = OpenTestDexFiles(name);
-    EXPECT_EQ(1U, vector.size());
-    return vector[0];
-  }
+  const DexFile* OpenTestDexFile(const char* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  jobject LoadDex(const char* dex_name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::vector<const DexFile*> dex_files = OpenTestDexFiles(dex_name);
-    CHECK_NE(0U, dex_files.size());
-    for (const DexFile* dex_file : dex_files) {
-      class_linker_->RegisterDexFile(*dex_file);
-    }
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    ScopedLocalRef<jobject> class_loader_local(soa.Env(),
-        soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
-    jobject class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
-    soa.Self()->SetClassLoaderOverride(soa.Decode<mirror::ClassLoader*>(class_loader_local.get()));
-    Runtime::Current()->SetCompileTimeClassPath(class_loader, dex_files);
-    return class_loader;
-  }
+  jobject LoadDex(const char* dex_name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   std::string android_data_;
   std::string dalvik_cache_;
@@ -337,7 +105,7 @@
   ClassLinker* class_linker_;
 
  private:
-  NoopCompilerCallbacks callbacks_;
+  std::unique_ptr<CompilerCallbacks> callbacks_;
   std::vector<const DexFile*> opened_dex_files_;
 };
 
@@ -345,29 +113,14 @@
 // rather than aborting, so be careful!
 class CheckJniAbortCatcher {
  public:
-  CheckJniAbortCatcher() : vm_(Runtime::Current()->GetJavaVM()) {
-    vm_->check_jni_abort_hook = Hook;
-    vm_->check_jni_abort_hook_data = &actual_;
-  }
+  CheckJniAbortCatcher();
 
-  ~CheckJniAbortCatcher() {
-    vm_->check_jni_abort_hook = nullptr;
-    vm_->check_jni_abort_hook_data = nullptr;
-    EXPECT_TRUE(actual_.empty()) << actual_;
-  }
+  ~CheckJniAbortCatcher();
 
-  void Check(const char* expected_text) {
-    EXPECT_TRUE(actual_.find(expected_text) != std::string::npos) << "\n"
-        << "Expected to find: " << expected_text << "\n"
-        << "In the output   : " << actual_;
-    actual_.clear();
-  }
+  void Check(const char* expected_text);
 
  private:
-  static void Hook(void* data, const std::string& reason) {
-    // We use += because when we're hooking the aborts like this, multiple problems can be found.
-    *reinterpret_cast<std::string*>(data) += reason;
-  }
+  static void Hook(void* data, const std::string& reason);
 
   JavaVMExt* vm_;
   std::string actual_;
@@ -398,10 +151,7 @@
 
 // TODO: isn't gtest supposed to be able to print STL types for itself?
 template <typename T>
-std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs) {
-  os << ::art::ToString(rhs);
-  return os;
-}
+std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs);
 
 }  // namespace std
 
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 8de3068..970593d 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -16,6 +16,8 @@
 
 #include "common_throws.h"
 
+#include <sstream>
+
 #include "base/logging.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -25,12 +27,9 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "thread.h"
 #include "verifier/method_verifier.h"
 
-#include <sstream>
-
 namespace art {
 
 static void AddReferrerLocation(std::ostream& os, mirror::Class* referrer)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c95be01..4cf4c09 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -25,11 +25,13 @@
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
+#include "field_helper.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "handle_scope.h"
 #include "jdwp/object_registry.h"
+#include "method_helper.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
@@ -39,7 +41,6 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "mirror/throwable.h"
-#include "object_utils.h"
 #include "quick/inline_method_analyser.h"
 #include "reflection.h"
 #include "safe_map.h"
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index c1e00fc..284aa89 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -18,7 +18,12 @@
 
 #include <memory>
 
+#include "base/stl_util.h"
+#include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
+#include "os.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 93faeae..d475d42 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -16,11 +16,15 @@
 
 #include "dex_file_verifier.h"
 
-#include <memory>
+#include "sys/mman.h"
 #include "zlib.h"
+#include <memory>
 
-#include "common_runtime_test.h"
+#include "base/unix_file/fd_file.h"
 #include "base/macros.h"
+#include "common_runtime_test.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/dex_method_iterator_test.cc b/runtime/dex_method_iterator_test.cc
index 0d00cc3..b8f180b 100644
--- a/runtime/dex_method_iterator_test.cc
+++ b/runtime/dex_method_iterator_test.cc
@@ -16,7 +16,10 @@
 
 #include "dex_method_iterator.h"
 
+#include "base/stl_util.h"
 #include "common_runtime_test.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
new file mode 100644
index 0000000..90c8fcf
--- /dev/null
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -0,0 +1,663 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_INL_H_
+#define ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_INL_H_
+
+#include "entrypoint_utils.h"
+
+#include "class_linker-inl.h"
+#include "common_throws.h"
+#include "dex_file.h"
+#include "indirect_reference_table.h"
+#include "invoke_type.h"
+#include "jni_internal.h"
+#include "mirror/art_method.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/throwable.h"
+#include "handle_scope-inl.h"
+#include "thread.h"
+
+namespace art {
+
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <const bool kAccessCheck>
+static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+                                              mirror::ArtMethod* method,
+                                              Thread* self, bool* slow_path) {
+  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  if (UNLIKELY(klass == NULL)) {
+    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
+    *slow_path = true;
+    if (klass == NULL) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+  }
+  if (kAccessCheck) {
+    if (UNLIKELY(!klass->IsInstantiable())) {
+      ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+      self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
+                              PrettyDescriptor(klass).c_str());
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
+  }
+  if (UNLIKELY(!klass->IsInitialized())) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_klass(hs.NewHandle(klass));
+    // EnsureInitialized (the class initializer) might cause a GC.
+    // may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of
+    // an old allocator. Also, the class initialization may fail. To
+    // handle these cases we mark the slow path boolean as true so
+    // that the caller knows to check the allocator type to see if it
+    // has changed and to null-check the return value in case the
+    // initialization fails.
+    *slow_path = true;
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    return h_klass.Get();
+  }
+  return klass;
+}
+
+// TODO: Fix no thread safety analysis when annotalysis is smarter.
+static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
+                                                                 Thread* self,
+                                                                 bool* slow_path) {
+  if (UNLIKELY(!klass->IsInitialized())) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_class(hs.NewHandle(klass));
+    // EnsureInitialized (the class initializer) might cause a GC.
+    // may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of
+    // an old allocator. Also, the class initialization may fail. To
+    // handle these cases we mark the slow path boolean as true so
+    // that the caller knows to check the allocator type to see if it
+    // has changed and to null-check the return value in case the
+    // initialization fails.
+    *slow_path = true;
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    return h_class.Get();
+  }
+  return klass;
+}
+
+// Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
+// cannot be resolved, throw an error. If it can, use it to create an instance.
+// When verification/compiler hasn't been able to verify access, optionally perform an access
+// check.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kAccessCheck, bool kInstrumented>
+static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+                                                  mirror::ArtMethod* method,
+                                                  Thread* self,
+                                                  gc::AllocatorType allocator_type) {
+  bool slow_path = false;
+  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    return klass->Alloc<kInstrumented>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  }
+  DCHECK(klass != nullptr);
+  return klass->Alloc<kInstrumented>(self, allocator_type);
+}
+
+// Given the context of a calling Method and a resolved class, create an instance.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kInstrumented>
+static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
+                                                          mirror::ArtMethod* method,
+                                                          Thread* self,
+                                                          gc::AllocatorType allocator_type) {
+  DCHECK(klass != nullptr);
+  bool slow_path = false;
+  klass = CheckClassInitializedForObjectAlloc(klass, self, &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    // Pass in false since the object can not be finalizable.
+    return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
+  }
+  // Pass in false since the object can not be finalizable.
+  return klass->Alloc<kInstrumented, false>(self, allocator_type);
+}
+
+// Given the context of a calling Method and an initialized class, create an instance.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kInstrumented>
+static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
+                                                             mirror::ArtMethod* method,
+                                                             Thread* self,
+                                                             gc::AllocatorType allocator_type) {
+  DCHECK(klass != nullptr);
+  // Pass in false since the object can not be finalizable.
+  return klass->Alloc<kInstrumented, false>(self, allocator_type);
+}
+
+
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <bool kAccessCheck>
+static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+                                             mirror::ArtMethod* method,
+                                             int32_t component_count,
+                                             bool* slow_path) {
+  if (UNLIKELY(component_count < 0)) {
+    ThrowNegativeArraySizeException(component_count);
+    *slow_path = true;
+    return nullptr;  // Failure
+  }
+  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
+    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
+    *slow_path = true;
+    if (klass == nullptr) {  // Error
+      DCHECK(Thread::Current()->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
+  }
+  if (kAccessCheck) {
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
+  }
+  return klass;
+}
+
+// Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
+// it cannot be resolved, throw an error. If it can, use it to create an array.
+// When verification/compiler hasn't been able to verify access, optionally perform an access
+// check.
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <bool kAccessCheck, bool kInstrumented>
+static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+                                                mirror::ArtMethod* method,
+                                                int32_t component_count,
+                                                Thread* self,
+                                                gc::AllocatorType allocator_type) {
+  bool slow_path = false;
+  mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count,
+                                                       &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                               klass->GetComponentSize(),
+                                               heap->GetCurrentAllocator());
+  }
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                             klass->GetComponentSize(), allocator_type);
+}
+
+template <bool kAccessCheck, bool kInstrumented>
+static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
+                                                        mirror::ArtMethod* method,
+                                                        int32_t component_count,
+                                                        Thread* self,
+                                                        gc::AllocatorType allocator_type) {
+  DCHECK(klass != nullptr);
+  if (UNLIKELY(component_count < 0)) {
+    ThrowNegativeArraySizeException(component_count);
+    return nullptr;  // Failure
+  }
+  if (kAccessCheck) {
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      return nullptr;  // Failure
+    }
+  }
+  // No need to retry a slow-path allocation as the above code won't cause a GC or thread
+  // suspension.
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                             klass->GetComponentSize(), allocator_type);
+}
+
+template<FindFieldType type, bool access_check>
+static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+                                                  Thread* self, size_t expected_size) {
+  bool is_primitive;
+  bool is_set;
+  bool is_static;
+  switch (type) {
+    case InstanceObjectRead:     is_primitive = false; is_set = false; is_static = false; break;
+    case InstanceObjectWrite:    is_primitive = false; is_set = true;  is_static = false; break;
+    case InstancePrimitiveRead:  is_primitive = true;  is_set = false; is_static = false; break;
+    case InstancePrimitiveWrite: is_primitive = true;  is_set = true;  is_static = false; break;
+    case StaticObjectRead:       is_primitive = false; is_set = false; is_static = true;  break;
+    case StaticObjectWrite:      is_primitive = false; is_set = true;  is_static = true;  break;
+    case StaticPrimitiveRead:    is_primitive = true;  is_set = false; is_static = true;  break;
+    case StaticPrimitiveWrite:   // Keep GCC happy by having a default handler, fall-through.
+    default:                     is_primitive = true;  is_set = true;  is_static = true;  break;
+  }
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtField* resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
+  if (UNLIKELY(resolved_field == nullptr)) {
+    DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
+    return nullptr;  // Failure.
+  }
+  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
+  if (access_check) {
+    if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
+      ThrowIncompatibleClassChangeErrorField(resolved_field, is_static, referrer);
+      return nullptr;
+    }
+    mirror::Class* referring_class = referrer->GetDeclaringClass();
+    if (UNLIKELY(!referring_class->CheckResolvedFieldAccess(fields_class, resolved_field,
+                                                            field_idx))) {
+      DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
+      return nullptr;  // Failure.
+    }
+    if (UNLIKELY(is_set && resolved_field->IsFinal() && (fields_class != referring_class))) {
+      ThrowIllegalAccessErrorFinalField(referrer, resolved_field);
+      return nullptr;  // Failure.
+    } else {
+      if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
+                   resolved_field->FieldSize() != expected_size)) {
+        ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+        DCHECK(throw_location.GetMethod() == referrer);
+        self->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
+                                 "Attempted read of %zd-bit %s on field '%s'",
+                                 expected_size * (32 / sizeof(int32_t)),
+                                 is_primitive ? "primitive" : "non-primitive",
+                                 PrettyField(resolved_field, true).c_str());
+        return nullptr;  // Failure.
+      }
+    }
+  }
+  if (!is_static) {
+    // instance fields must be being accessed on an initialized class
+    return resolved_field;
+  } else {
+    // If the class is initialized we're done.
+    if (LIKELY(fields_class->IsInitialized())) {
+      return resolved_field;
+    } else {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(fields_class));
+      if (LIKELY(class_linker->EnsureInitialized(h_class, true, true))) {
+        // Otherwise let's ensure the class is initialized before resolving the field.
+        return resolved_field;
+      }
+      DCHECK(self->IsExceptionPending());  // Throw exception and unwind
+      return nullptr;  // Failure.
+    }
+  }
+}
+
+// Explicit template declarations of FindFieldFromCode for all field access types.
+#define EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, _access_check) \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE \
+mirror::ArtField* FindFieldFromCode<_type, _access_check>(uint32_t field_idx, \
+                                                          mirror::ArtMethod* referrer, \
+                                                          Thread* self, size_t expected_size) \
+
+#define EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
+    EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, false); \
+    EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, true)
+
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstanceObjectRead);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstanceObjectWrite);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstancePrimitiveRead);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstancePrimitiveWrite);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticObjectRead);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticObjectWrite);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticPrimitiveRead);
+EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticPrimitiveWrite);
+
+#undef EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL
+#undef EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL
+
+template<InvokeType type, bool access_check>
+static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
+                                                    mirror::Object** this_object,
+                                                    mirror::ArtMethod** referrer, Thread* self) {
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtMethod* resolved_method = class_linker->GetResolvedMethod(method_idx, *referrer, type);
+  if (resolved_method == nullptr) {
+    StackHandleScope<1> hs(self);
+    mirror::Object* null_this = nullptr;
+    HandleWrapper<mirror::Object> h_this(
+        hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
+    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
+  }
+  if (UNLIKELY(resolved_method == nullptr)) {
+    DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
+    return nullptr;  // Failure.
+  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
+    // Maintain interpreter-like semantics where NullPointerException is thrown
+    // after potential NoSuchMethodError from class linker.
+    ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+    DCHECK_EQ(*referrer, throw_location.GetMethod());
+    ThrowNullPointerExceptionForMethodAccess(throw_location, method_idx, type);
+    return nullptr;  // Failure.
+  } else if (access_check) {
+    // Incompatible class change should have been handled in resolve method.
+    if (UNLIKELY(resolved_method->CheckIncompatibleClassChange(type))) {
+      ThrowIncompatibleClassChangeError(type, resolved_method->GetInvokeType(), resolved_method,
+                                        *referrer);
+      return nullptr;  // Failure.
+    }
+    mirror::Class* methods_class = resolved_method->GetDeclaringClass();
+    mirror::Class* referring_class = (*referrer)->GetDeclaringClass();
+    bool can_access_resolved_method =
+        referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
+                                                         method_idx);
+    if (UNLIKELY(!can_access_resolved_method)) {
+      DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
+      return nullptr;  // Failure.
+    }
+  }
+  switch (type) {
+    case kStatic:
+    case kDirect:
+      return resolved_method;
+    case kVirtual: {
+      mirror::ObjectArray<mirror::ArtMethod>* vtable = (*this_object)->GetClass()->GetVTable();
+      uint16_t vtable_index = resolved_method->GetMethodIndex();
+      if (access_check &&
+          (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
+        // Behavior to agree with that of the verifier.
+        ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
+                               resolved_method->GetName(), resolved_method->GetSignature());
+        return nullptr;  // Failure.
+      }
+      DCHECK(vtable != nullptr);
+      return vtable->GetWithoutChecks(vtable_index);
+    }
+    case kSuper: {
+      mirror::Class* super_class = (*referrer)->GetDeclaringClass()->GetSuperClass();
+      uint16_t vtable_index = resolved_method->GetMethodIndex();
+      mirror::ObjectArray<mirror::ArtMethod>* vtable;
+      if (access_check) {
+        // Check existence of super class.
+        vtable = (super_class != nullptr) ? super_class->GetVTable() : nullptr;
+        if (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength())) {
+          // Behavior to agree with that of the verifier.
+          ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
+                                 resolved_method->GetName(), resolved_method->GetSignature());
+          return nullptr;  // Failure.
+        }
+      } else {
+        // Super class must exist.
+        DCHECK(super_class != nullptr);
+        vtable = super_class->GetVTable();
+      }
+      DCHECK(vtable != nullptr);
+      return vtable->GetWithoutChecks(vtable_index);
+    }
+    case kInterface: {
+      uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+      mirror::ArtMethod* imt_method = (*this_object)->GetClass()->GetEmbeddedImTableEntry(imt_index);
+      if (!imt_method->IsImtConflictMethod()) {
+        return imt_method;
+      } else {
+        mirror::ArtMethod* interface_method =
+            (*this_object)->GetClass()->FindVirtualMethodForInterface(resolved_method);
+        if (UNLIKELY(interface_method == nullptr)) {
+          ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method,
+                                                                     *this_object, *referrer);
+          return nullptr;  // Failure.
+        }
+        return interface_method;
+      }
+    }
+    default:
+      LOG(FATAL) << "Unknown invoke type " << type;
+      return nullptr;  // Failure.
+  }
+}
+
+// Explicit template declarations of FindMethodFromCode for all invoke types.
+#define EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, _access_check)                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                       \
+  mirror::ArtMethod* FindMethodFromCode<_type, _access_check>(uint32_t method_idx,         \
+                                                              mirror::Object** this_object, \
+                                                              mirror::ArtMethod** referrer, \
+                                                              Thread* self)
+#define EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
+    EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, false);   \
+    EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, true)
+
+EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kStatic);
+EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kDirect);
+EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kVirtual);
+EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kSuper);
+EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kInterface);
+
+#undef EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL
+#undef EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL
+
+// Fast path field resolution that can't initialize classes or throw exceptions.
+static inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
+                                              mirror::ArtMethod* referrer,
+                                              FindFieldType type, size_t expected_size) {
+  mirror::ArtField* resolved_field =
+      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
+  if (UNLIKELY(resolved_field == nullptr)) {
+    return nullptr;
+  }
+  // Check for incompatible class change.
+  bool is_primitive;
+  bool is_set;
+  bool is_static;
+  switch (type) {
+    case InstanceObjectRead:     is_primitive = false; is_set = false; is_static = false; break;
+    case InstanceObjectWrite:    is_primitive = false; is_set = true;  is_static = false; break;
+    case InstancePrimitiveRead:  is_primitive = true;  is_set = false; is_static = false; break;
+    case InstancePrimitiveWrite: is_primitive = true;  is_set = true;  is_static = false; break;
+    case StaticObjectRead:       is_primitive = false; is_set = false; is_static = true;  break;
+    case StaticObjectWrite:      is_primitive = false; is_set = true;  is_static = true;  break;
+    case StaticPrimitiveRead:    is_primitive = true;  is_set = false; is_static = true;  break;
+    case StaticPrimitiveWrite:   is_primitive = true;  is_set = true;  is_static = true;  break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";  // Assignment below to avoid GCC warnings.
+      is_primitive = true;
+      is_set = true;
+      is_static = true;
+      break;
+  }
+  if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
+    // Incompatible class change.
+    return nullptr;
+  }
+  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
+  if (is_static) {
+    // Check class is initialized else fail so that we can contend to initialize the class with
+    // other threads that may be racing to do this.
+    if (UNLIKELY(!fields_class->IsInitialized())) {
+      return nullptr;
+    }
+  }
+  mirror::Class* referring_class = referrer->GetDeclaringClass();
+  if (UNLIKELY(!referring_class->CanAccess(fields_class) ||
+               !referring_class->CanAccessMember(fields_class,
+                                                 resolved_field->GetAccessFlags()) ||
+               (is_set && resolved_field->IsFinal() && (fields_class != referring_class)))) {
+    // Illegal access.
+    return nullptr;
+  }
+  if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
+               resolved_field->FieldSize() != expected_size)) {
+    return nullptr;
+  }
+  return resolved_field;
+}
+
+// Fast path method resolution that can't throw exceptions.
+static inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
+                                                mirror::Object* this_object,
+                                                mirror::ArtMethod* referrer,
+                                                bool access_check, InvokeType type) {
+  bool is_direct = type == kStatic || type == kDirect;
+  if (UNLIKELY(this_object == NULL && !is_direct)) {
+    return NULL;
+  }
+  mirror::ArtMethod* resolved_method =
+      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedMethod(method_idx);
+  if (UNLIKELY(resolved_method == NULL)) {
+    return NULL;
+  }
+  if (access_check) {
+    // Check for incompatible class change errors and access.
+    bool icce = resolved_method->CheckIncompatibleClassChange(type);
+    if (UNLIKELY(icce)) {
+      return NULL;
+    }
+    mirror::Class* methods_class = resolved_method->GetDeclaringClass();
+    mirror::Class* referring_class = referrer->GetDeclaringClass();
+    if (UNLIKELY(!referring_class->CanAccess(methods_class) ||
+                 !referring_class->CanAccessMember(methods_class,
+                                                   resolved_method->GetAccessFlags()))) {
+      // Potential illegal access, may need to refine the method's class.
+      return NULL;
+    }
+  }
+  if (type == kInterface) {  // Most common form of slow path dispatch.
+    return this_object->GetClass()->FindVirtualMethodForInterface(resolved_method);
+  } else if (is_direct) {
+    return resolved_method;
+  } else if (type == kSuper) {
+    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTable()->
+        Get(resolved_method->GetMethodIndex());
+  } else {
+    DCHECK(type == kVirtual);
+    return this_object->GetClass()->GetVTable()->Get(resolved_method->GetMethodIndex());
+  }
+}
+
+static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
+                                                    mirror::ArtMethod* referrer,
+                                                    Thread* self, bool can_run_clinit,
+                                                    bool verify_access) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
+  if (UNLIKELY(klass == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return nullptr;  // Failure - Indicate to caller to deliver exception
+  }
+  // Perform access check if necessary.
+  mirror::Class* referring_class = referrer->GetDeclaringClass();
+  if (verify_access && UNLIKELY(!referring_class->CanAccess(klass))) {
+    ThrowIllegalAccessErrorClass(referring_class, klass);
+    return nullptr;  // Failure - Indicate to caller to deliver exception
+  }
+  // If we're just implementing const-class, we shouldn't call <clinit>.
+  if (!can_run_clinit) {
+    return klass;
+  }
+  // If we are the <clinit> of this class, just return our storage.
+  //
+  // Do not set the DexCache InitializedStaticStorage, since that implies <clinit> has finished
+  // running.
+  if (klass == referring_class && referrer->IsConstructor() && referrer->IsStatic()) {
+    return klass;
+  }
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> h_class(hs.NewHandle(klass));
+  if (!class_linker->EnsureInitialized(h_class, true, true)) {
+    CHECK(self->IsExceptionPending());
+    return nullptr;  // Failure - Indicate to caller to deliver exception
+  }
+  return h_class.Get();
+}
+
+static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
+                                                    uint32_t string_idx) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  return class_linker->ResolveString(string_idx, referrer);
+}
+
+static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) {
+  // Save any pending exception over monitor exit call.
+  mirror::Throwable* saved_exception = NULL;
+  ThrowLocation saved_throw_location;
+  bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
+  if (UNLIKELY(self->IsExceptionPending())) {
+    saved_exception = self->GetException(&saved_throw_location);
+    self->ClearException();
+  }
+  // Decode locked object and unlock, before popping local references.
+  self->DecodeJObject(locked)->MonitorExit(self);
+  if (UNLIKELY(self->IsExceptionPending())) {
+    LOG(FATAL) << "Synchronized JNI code returning with an exception:\n"
+        << saved_exception->Dump()
+        << "\nEncountered second exception during implicit MonitorExit:\n"
+        << self->GetException(NULL)->Dump();
+  }
+  // Restore pending exception.
+  if (saved_exception != NULL) {
+    self->SetException(saved_throw_location, saved_exception);
+    self->SetExceptionReportedToInstrumentation(is_exception_reported);
+  }
+}
+
+static inline void CheckSuspend(Thread* thread) {
+  for (;;) {
+    if (thread->ReadFlag(kCheckpointRequest)) {
+      thread->RunCheckpointFunction();
+    } else if (thread->ReadFlag(kSuspendRequest)) {
+      thread->FullSuspendCheck();
+    } else {
+      break;
+    }
+  }
+}
+
+template <typename INT_TYPE, typename FLOAT_TYPE>
+static inline INT_TYPE art_float_to_integral(FLOAT_TYPE f) {
+  const INT_TYPE kMaxInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::max());
+  const INT_TYPE kMinInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::min());
+  const FLOAT_TYPE kMaxIntAsFloat = static_cast<FLOAT_TYPE>(kMaxInt);
+  const FLOAT_TYPE kMinIntAsFloat = static_cast<FLOAT_TYPE>(kMinInt);
+  if (LIKELY(f > kMinIntAsFloat)) {
+     if (LIKELY(f < kMaxIntAsFloat)) {
+       return static_cast<INT_TYPE>(f);
+     } else {
+       return kMaxInt;
+     }
+  } else {
+    return (f != f) ? 0 : kMinInt;  // f != f implies NaN
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_INL_H_
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index a0e32f5..0fa0e41 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -16,16 +16,16 @@
 
 #include "entrypoints/entrypoint_utils.h"
 
+#include "base/mutex.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "mirror/object_array-inl.h"
-#include "mirror/proxy.h"
 #include "reflection.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -139,6 +139,25 @@
   self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
+void CheckReferenceResult(mirror::Object* o, Thread* self) {
+  if (o == NULL) {
+    return;
+  }
+  mirror::ArtMethod* m = self->GetCurrentMethod(NULL);
+  if (o == kInvalidIndirectRefObject) {
+    JniAbortF(NULL, "invalid reference returned from %s", PrettyMethod(m).c_str());
+  }
+  // Make sure that the result is an instance of the type this method was expected to return.
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ArtMethod> h_m(hs.NewHandle(m));
+  mirror::Class* return_type = MethodHelper(h_m).GetReturnType();
+
+  if (!o->InstanceOf(return_type)) {
+    JniAbortF(NULL, "attempt to return an instance of %s from %s", PrettyTypeOf(o).c_str(),
+              PrettyMethod(h_m.Get()).c_str());
+  }
+}
+
 JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_method_jobj,
                                     std::vector<jvalue>& args) {
@@ -219,8 +238,7 @@
     mirror::Throwable* exception = soa.Self()->GetException(NULL);
     if (exception->IsCheckedException()) {
       mirror::Object* rcvr = soa.Decode<mirror::Object*>(rcvr_jobj);
-      mirror::SynthesizedProxyClass* proxy_class =
-          down_cast<mirror::SynthesizedProxyClass*>(rcvr->GetClass());
+      mirror::Class* proxy_class = rcvr->GetClass();
       mirror::ArtMethod* interface_method =
           soa.Decode<mirror::ArtMethod*>(interface_method_jobj);
       mirror::ArtMethod* proxy_method =
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index ff836a4..c5d67aa 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -17,105 +17,40 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
 #define ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
 
+#include <jni.h>
+#include <stdint.h>
+
 #include "base/macros.h"
-#include "class_linker-inl.h"
-#include "common_throws.h"
-#include "dex_file.h"
-#include "indirect_reference_table.h"
+#include "base/mutex.h"
+#include "gc/allocator_type.h"
 #include "invoke_type.h"
-#include "jni_internal.h"
-#include "mirror/art_method.h"
-#include "mirror/array.h"
-#include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/throwable.h"
-#include "object_utils.h"
-#include "handle_scope-inl.h"
-#include "thread.h"
+#include "jvalue.h"
 
 namespace art {
 
 namespace mirror {
   class Class;
+  class Array;
   class ArtField;
+  class ArtMethod;
   class Object;
+  class String;
 }  // namespace mirror
 
+class ScopedObjectAccessAlreadyRunnable;
+class Thread;
+
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <const bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
                                                             mirror::ArtMethod* method,
                                                             Thread* self, bool* slow_path)
-    NO_THREAD_SAFETY_ANALYSIS {
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
-  if (UNLIKELY(klass == NULL)) {
-    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-    *slow_path = true;
-    if (klass == NULL) {
-      DCHECK(self->IsExceptionPending());
-      return nullptr;  // Failure
-    }
-  }
-  if (kAccessCheck) {
-    if (UNLIKELY(!klass->IsInstantiable())) {
-      ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-      self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
-                              PrettyDescriptor(klass).c_str());
-      *slow_path = true;
-      return nullptr;  // Failure
-    }
-    mirror::Class* referrer = method->GetDeclaringClass();
-    if (UNLIKELY(!referrer->CanAccess(klass))) {
-      ThrowIllegalAccessErrorClass(referrer, klass);
-      *slow_path = true;
-      return nullptr;  // Failure
-    }
-  }
-  if (UNLIKELY(!klass->IsInitialized())) {
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Class> h_klass(hs.NewHandle(klass));
-    // EnsureInitialized (the class initializer) might cause a GC.
-    // may cause us to suspend meaning that another thread may try to
-    // change the allocator while we are stuck in the entrypoints of
-    // an old allocator. Also, the class initialization may fail. To
-    // handle these cases we mark the slow path boolean as true so
-    // that the caller knows to check the allocator type to see if it
-    // has changed and to null-check the return value in case the
-    // initialization fails.
-    *slow_path = true;
-    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true)) {
-      DCHECK(self->IsExceptionPending());
-      return nullptr;  // Failure
-    }
-    return h_klass.Get();
-  }
-  return klass;
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // TODO: Fix no thread safety analysis when annotalysis is smarter.
 ALWAYS_INLINE static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
                                                                                Thread* self, bool* slow_path)
-    NO_THREAD_SAFETY_ANALYSIS {
-  if (UNLIKELY(!klass->IsInitialized())) {
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Class> h_class(hs.NewHandle(klass));
-    // EnsureInitialized (the class initializer) might cause a GC.
-    // may cause us to suspend meaning that another thread may try to
-    // change the allocator while we are stuck in the entrypoints of
-    // an old allocator. Also, the class initialization may fail. To
-    // handle these cases we mark the slow path boolean as true so
-    // that the caller knows to check the allocator type to see if it
-    // has changed and to null-check the return value in case the
-    // initialization fails.
-    *slow_path = true;
-    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
-      DCHECK(self->IsExceptionPending());
-      return nullptr;  // Failure
-    }
-    return h_class.Get();
-  }
-  return klass;
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
@@ -127,18 +62,7 @@
                                                                 mirror::ArtMethod* method,
                                                                 Thread* self,
                                                                 gc::AllocatorType allocator_type)
-    NO_THREAD_SAFETY_ANALYSIS {
-  bool slow_path = false;
-  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path);
-  if (UNLIKELY(slow_path)) {
-    if (klass == nullptr) {
-      return nullptr;
-    }
-    return klass->Alloc<kInstrumented>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
-  }
-  DCHECK(klass != nullptr);
-  return klass->Alloc<kInstrumented>(self, allocator_type);
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method and a resolved class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
@@ -147,21 +71,7 @@
                                                                         mirror::ArtMethod* method,
                                                                         Thread* self,
                                                                         gc::AllocatorType allocator_type)
-    NO_THREAD_SAFETY_ANALYSIS {
-  DCHECK(klass != nullptr);
-  bool slow_path = false;
-  klass = CheckClassInitializedForObjectAlloc(klass, self, &slow_path);
-  if (UNLIKELY(slow_path)) {
-    if (klass == nullptr) {
-      return nullptr;
-    }
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Pass in false since the object can not be finalizable.
-    return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
-  }
-  // Pass in false since the object can not be finalizable.
-  return klass->Alloc<kInstrumented, false>(self, allocator_type);
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method and an initialized class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
@@ -170,11 +80,7 @@
                                                                            mirror::ArtMethod* method,
                                                                            Thread* self,
                                                                            gc::AllocatorType allocator_type)
-    NO_THREAD_SAFETY_ANALYSIS {
-  DCHECK(klass != nullptr);
-  // Pass in false since the object can not be finalizable.
-  return klass->Alloc<kInstrumented, false>(self, allocator_type);
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
@@ -183,32 +89,7 @@
                                                            mirror::ArtMethod* method,
                                                            int32_t component_count,
                                                            bool* slow_path)
-    NO_THREAD_SAFETY_ANALYSIS {
-  if (UNLIKELY(component_count < 0)) {
-    ThrowNegativeArraySizeException(component_count);
-    *slow_path = true;
-    return nullptr;  // Failure
-  }
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
-  if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
-    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-    *slow_path = true;
-    if (klass == nullptr) {  // Error
-      DCHECK(Thread::Current()->IsExceptionPending());
-      return nullptr;  // Failure
-    }
-    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
-  }
-  if (kAccessCheck) {
-    mirror::Class* referrer = method->GetDeclaringClass();
-    if (UNLIKELY(!referrer->CanAccess(klass))) {
-      ThrowIllegalAccessErrorClass(referrer, klass);
-      *slow_path = true;
-      return nullptr;  // Failure
-    }
-  }
-  return klass;
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
 // it cannot be resolved, throw an error. If it can, use it to create an array.
@@ -221,22 +102,7 @@
                                                               int32_t component_count,
                                                               Thread* self,
                                                               gc::AllocatorType allocator_type)
-    NO_THREAD_SAFETY_ANALYSIS {
-  bool slow_path = false;
-  mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count,
-                                                       &slow_path);
-  if (UNLIKELY(slow_path)) {
-    if (klass == nullptr) {
-      return nullptr;
-    }
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                               klass->GetComponentSize(),
-                                               heap->GetCurrentAllocator());
-  }
-  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                             klass->GetComponentSize(), allocator_type);
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
@@ -244,24 +110,7 @@
                                                                       int32_t component_count,
                                                                       Thread* self,
                                                                       gc::AllocatorType allocator_type)
-    NO_THREAD_SAFETY_ANALYSIS {
-  DCHECK(klass != nullptr);
-  if (UNLIKELY(component_count < 0)) {
-    ThrowNegativeArraySizeException(component_count);
-    return nullptr;  // Failure
-  }
-  if (kAccessCheck) {
-    mirror::Class* referrer = method->GetDeclaringClass();
-    if (UNLIKELY(!referrer->CanAccess(klass))) {
-      ThrowIllegalAccessErrorClass(referrer, klass);
-      return nullptr;  // Failure
-    }
-  }
-  // No need to retry a slow-path allocation as the above code won't cause a GC or thread
-  // suspension.
-  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
-                                             klass->GetComponentSize(), allocator_type);
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
                                                  int32_t component_count, Thread* self,
@@ -290,422 +139,48 @@
 
 template<FindFieldType type, bool access_check>
 static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
-                                                  Thread* self, size_t expected_size) {
-  bool is_primitive;
-  bool is_set;
-  bool is_static;
-  switch (type) {
-    case InstanceObjectRead:     is_primitive = false; is_set = false; is_static = false; break;
-    case InstanceObjectWrite:    is_primitive = false; is_set = true;  is_static = false; break;
-    case InstancePrimitiveRead:  is_primitive = true;  is_set = false; is_static = false; break;
-    case InstancePrimitiveWrite: is_primitive = true;  is_set = true;  is_static = false; break;
-    case StaticObjectRead:       is_primitive = false; is_set = false; is_static = true;  break;
-    case StaticObjectWrite:      is_primitive = false; is_set = true;  is_static = true;  break;
-    case StaticPrimitiveRead:    is_primitive = true;  is_set = false; is_static = true;  break;
-    case StaticPrimitiveWrite:   // Keep GCC happy by having a default handler, fall-through.
-    default:                     is_primitive = true;  is_set = true;  is_static = true;  break;
-  }
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::ArtField* resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
-  if (UNLIKELY(resolved_field == nullptr)) {
-    DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
-    return nullptr;  // Failure.
-  }
-  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-  if (access_check) {
-    if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
-      ThrowIncompatibleClassChangeErrorField(resolved_field, is_static, referrer);
-      return nullptr;
-    }
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
-    if (UNLIKELY(!referring_class->CheckResolvedFieldAccess(fields_class, resolved_field,
-                                                            field_idx))) {
-      DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
-      return nullptr;  // Failure.
-    }
-    if (UNLIKELY(is_set && resolved_field->IsFinal() && (fields_class != referring_class))) {
-      ThrowIllegalAccessErrorFinalField(referrer, resolved_field);
-      return nullptr;  // Failure.
-    } else {
-      if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
-                   resolved_field->FieldSize() != expected_size)) {
-        ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-        DCHECK(throw_location.GetMethod() == referrer);
-        self->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
-                                 "Attempted read of %zd-bit %s on field '%s'",
-                                 expected_size * (32 / sizeof(int32_t)),
-                                 is_primitive ? "primitive" : "non-primitive",
-                                 PrettyField(resolved_field, true).c_str());
-        return nullptr;  // Failure.
-      }
-    }
-  }
-  if (!is_static) {
-    // instance fields must be being accessed on an initialized class
-    return resolved_field;
-  } else {
-    // If the class is initialized we're done.
-    if (LIKELY(fields_class->IsInitialized())) {
-      return resolved_field;
-    } else {
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> h_class(hs.NewHandle(fields_class));
-      if (LIKELY(class_linker->EnsureInitialized(h_class, true, true))) {
-        // Otherwise let's ensure the class is initialized before resolving the field.
-        return resolved_field;
-      }
-      DCHECK(self->IsExceptionPending());  // Throw exception and unwind
-      return nullptr;  // Failure.
-    }
-  }
-}
-
-// Explicit template declarations of FindFieldFromCode for all field access types.
-#define EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, _access_check) \
-template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE \
-mirror::ArtField* FindFieldFromCode<_type, _access_check>(uint32_t field_idx, \
-                                                          mirror::ArtMethod* referrer, \
-                                                          Thread* self, size_t expected_size) \
-
-#define EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
-    EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, false); \
-    EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, true)
-
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstanceObjectRead);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstanceObjectWrite);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstancePrimitiveRead);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(InstancePrimitiveWrite);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticObjectRead);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticObjectWrite);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticPrimitiveRead);
-EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(StaticPrimitiveWrite);
-
-#undef EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL
-#undef EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL
+                                                  Thread* self, size_t expected_size)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 template<InvokeType type, bool access_check>
 static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
                                                     mirror::Object** this_object,
-                                                    mirror::ArtMethod** referrer, Thread* self) {
-  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-  mirror::ArtMethod* resolved_method = class_linker->GetResolvedMethod(method_idx, *referrer, type);
-  if (resolved_method == nullptr) {
-    StackHandleScope<1> hs(self);
-    mirror::Object* null_this = nullptr;
-    HandleWrapper<mirror::Object> h_this(
-        hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
-    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
-  }
-  if (UNLIKELY(resolved_method == nullptr)) {
-    DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
-    return nullptr;  // Failure.
-  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    // Maintain interpreter-like semantics where NullPointerException is thrown
-    // after potential NoSuchMethodError from class linker.
-    ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-    DCHECK_EQ(*referrer, throw_location.GetMethod());
-    ThrowNullPointerExceptionForMethodAccess(throw_location, method_idx, type);
-    return nullptr;  // Failure.
-  } else if (access_check) {
-    // Incompatible class change should have been handled in resolve method.
-    if (UNLIKELY(resolved_method->CheckIncompatibleClassChange(type))) {
-      ThrowIncompatibleClassChangeError(type, resolved_method->GetInvokeType(), resolved_method,
-                                        *referrer);
-      return nullptr;  // Failure.
-    }
-    mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = (*referrer)->GetDeclaringClass();
-    bool can_access_resolved_method =
-        referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
-                                                         method_idx);
-    if (UNLIKELY(!can_access_resolved_method)) {
-      DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
-      return nullptr;  // Failure.
-    }
-  }
-  switch (type) {
-    case kStatic:
-    case kDirect:
-      return resolved_method;
-    case kVirtual: {
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = (*this_object)->GetClass()->GetVTable();
-      uint16_t vtable_index = resolved_method->GetMethodIndex();
-      if (access_check &&
-          (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
-        // Behavior to agree with that of the verifier.
-        ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
-                               resolved_method->GetName(), resolved_method->GetSignature());
-        return nullptr;  // Failure.
-      }
-      DCHECK(vtable != nullptr);
-      return vtable->GetWithoutChecks(vtable_index);
-    }
-    case kSuper: {
-      mirror::Class* super_class = (*referrer)->GetDeclaringClass()->GetSuperClass();
-      uint16_t vtable_index = resolved_method->GetMethodIndex();
-      mirror::ObjectArray<mirror::ArtMethod>* vtable;
-      if (access_check) {
-        // Check existence of super class.
-        vtable = (super_class != nullptr) ? super_class->GetVTable() : nullptr;
-        if (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength())) {
-          // Behavior to agree with that of the verifier.
-          ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
-                                 resolved_method->GetName(), resolved_method->GetSignature());
-          return nullptr;  // Failure.
-        }
-      } else {
-        // Super class must exist.
-        DCHECK(super_class != nullptr);
-        vtable = super_class->GetVTable();
-      }
-      DCHECK(vtable != nullptr);
-      return vtable->GetWithoutChecks(vtable_index);
-    }
-    case kInterface: {
-      uint32_t imt_index = resolved_method->GetDexMethodIndex() % ClassLinker::kImtSize;
-      mirror::ObjectArray<mirror::ArtMethod>* imt_table = (*this_object)->GetClass()->GetImTable();
-      mirror::ArtMethod* imt_method = imt_table->Get(imt_index);
-      if (!imt_method->IsImtConflictMethod()) {
-        return imt_method;
-      } else {
-        mirror::ArtMethod* interface_method =
-            (*this_object)->GetClass()->FindVirtualMethodForInterface(resolved_method);
-        if (UNLIKELY(interface_method == nullptr)) {
-          ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method,
-                                                                     *this_object, *referrer);
-          return nullptr;  // Failure.
-        }
-        return interface_method;
-      }
-    }
-    default:
-      LOG(FATAL) << "Unknown invoke type " << type;
-      return nullptr;  // Failure.
-  }
-}
-
-// Explicit template declarations of FindMethodFromCode for all invoke types.
-#define EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, _access_check)                 \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                       \
-  mirror::ArtMethod* FindMethodFromCode<_type, _access_check>(uint32_t method_idx,         \
-                                                              mirror::Object** this_object, \
-                                                              mirror::ArtMethod** referrer, \
-                                                              Thread* self)
-#define EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
-    EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, false);   \
-    EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, true)
-
-EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kStatic);
-EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kDirect);
-EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kVirtual);
-EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kSuper);
-EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(kInterface);
-
-#undef EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL
-#undef EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL
+                                                    mirror::ArtMethod** referrer, Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
 static inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
                                               mirror::ArtMethod* referrer,
                                               FindFieldType type, size_t expected_size)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtField* resolved_field =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
-  if (UNLIKELY(resolved_field == nullptr)) {
-    return nullptr;
-  }
-  // Check for incompatible class change.
-  bool is_primitive;
-  bool is_set;
-  bool is_static;
-  switch (type) {
-    case InstanceObjectRead:     is_primitive = false; is_set = false; is_static = false; break;
-    case InstanceObjectWrite:    is_primitive = false; is_set = true;  is_static = false; break;
-    case InstancePrimitiveRead:  is_primitive = true;  is_set = false; is_static = false; break;
-    case InstancePrimitiveWrite: is_primitive = true;  is_set = true;  is_static = false; break;
-    case StaticObjectRead:       is_primitive = false; is_set = false; is_static = true;  break;
-    case StaticObjectWrite:      is_primitive = false; is_set = true;  is_static = true;  break;
-    case StaticPrimitiveRead:    is_primitive = true;  is_set = false; is_static = true;  break;
-    case StaticPrimitiveWrite:   is_primitive = true;  is_set = true;  is_static = true;  break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";  // Assignment below to avoid GCC warnings.
-      is_primitive = true;
-      is_set = true;
-      is_static = true;
-      break;
-  }
-  if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
-    // Incompatible class change.
-    return nullptr;
-  }
-  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-  if (is_static) {
-    // Check class is initialized else fail so that we can contend to initialize the class with
-    // other threads that may be racing to do this.
-    if (UNLIKELY(!fields_class->IsInitialized())) {
-      return nullptr;
-    }
-  }
-  mirror::Class* referring_class = referrer->GetDeclaringClass();
-  if (UNLIKELY(!referring_class->CanAccess(fields_class) ||
-               !referring_class->CanAccessMember(fields_class,
-                                                 resolved_field->GetAccessFlags()) ||
-               (is_set && resolved_field->IsFinal() && (fields_class != referring_class)))) {
-    // Illegal access.
-    return nullptr;
-  }
-  if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
-               resolved_field->FieldSize() != expected_size)) {
-    return nullptr;
-  }
-  return resolved_field;
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Fast path method resolution that can't throw exceptions.
 static inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
                                                 mirror::Object* this_object,
                                                 mirror::ArtMethod* referrer,
                                                 bool access_check, InvokeType type)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  bool is_direct = type == kStatic || type == kDirect;
-  if (UNLIKELY(this_object == NULL && !is_direct)) {
-    return NULL;
-  }
-  mirror::ArtMethod* resolved_method =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedMethod(method_idx);
-  if (UNLIKELY(resolved_method == NULL)) {
-    return NULL;
-  }
-  if (access_check) {
-    // Check for incompatible class change errors and access.
-    bool icce = resolved_method->CheckIncompatibleClassChange(type);
-    if (UNLIKELY(icce)) {
-      return NULL;
-    }
-    mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
-    if (UNLIKELY(!referring_class->CanAccess(methods_class) ||
-                 !referring_class->CanAccessMember(methods_class,
-                                                   resolved_method->GetAccessFlags()))) {
-      // Potential illegal access, may need to refine the method's class.
-      return NULL;
-    }
-  }
-  if (type == kInterface) {  // Most common form of slow path dispatch.
-    return this_object->GetClass()->FindVirtualMethodForInterface(resolved_method);
-  } else if (is_direct) {
-    return resolved_method;
-  } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTable()->
-        Get(resolved_method->GetMethodIndex());
-  } else {
-    DCHECK(type == kVirtual);
-    return this_object->GetClass()->GetVTable()->Get(resolved_method->GetMethodIndex());
-  }
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
                                                     mirror::ArtMethod* referrer,
                                                     Thread* self, bool can_run_clinit,
                                                     bool verify_access)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
-  if (UNLIKELY(klass == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return nullptr;  // Failure - Indicate to caller to deliver exception
-  }
-  // Perform access check if necessary.
-  mirror::Class* referring_class = referrer->GetDeclaringClass();
-  if (verify_access && UNLIKELY(!referring_class->CanAccess(klass))) {
-    ThrowIllegalAccessErrorClass(referring_class, klass);
-    return nullptr;  // Failure - Indicate to caller to deliver exception
-  }
-  // If we're just implementing const-class, we shouldn't call <clinit>.
-  if (!can_run_clinit) {
-    return klass;
-  }
-  // If we are the <clinit> of this class, just return our storage.
-  //
-  // Do not set the DexCache InitializedStaticStorage, since that implies <clinit> has finished
-  // running.
-  if (klass == referring_class && referrer->IsConstructor() && referrer->IsStatic()) {
-    return klass;
-  }
-  StackHandleScope<1> hs(self);
-  Handle<mirror::Class> h_class(hs.NewHandle(klass));
-  if (!class_linker->EnsureInitialized(h_class, true, true)) {
-    CHECK(self->IsExceptionPending());
-    return nullptr;  // Failure - Indicate to caller to deliver exception
-  }
-  return h_class.Get();
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 extern void ThrowStackOverflowError(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
                                                     uint32_t string_idx)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  return class_linker->ResolveString(string_idx, referrer);
-}
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+// TODO: annotalysis disabled as monitor semantics are maintained in Java code.
 static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
-    NO_THREAD_SAFETY_ANALYSIS /* SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) */ {
-  // Save any pending exception over monitor exit call.
-  mirror::Throwable* saved_exception = NULL;
-  ThrowLocation saved_throw_location;
-  bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
-  if (UNLIKELY(self->IsExceptionPending())) {
-    saved_exception = self->GetException(&saved_throw_location);
-    self->ClearException();
-  }
-  // Decode locked object and unlock, before popping local references.
-  self->DecodeJObject(locked)->MonitorExit(self);
-  if (UNLIKELY(self->IsExceptionPending())) {
-    LOG(FATAL) << "Synchronized JNI code returning with an exception:\n"
-        << saved_exception->Dump()
-        << "\nEncountered second exception during implicit MonitorExit:\n"
-        << self->GetException(NULL)->Dump();
-  }
-  // Restore pending exception.
-  if (saved_exception != NULL) {
-    self->SetException(saved_throw_location, saved_exception);
-    self->SetExceptionReportedToInstrumentation(is_exception_reported);
-  }
-}
+    NO_THREAD_SAFETY_ANALYSIS;
 
-static inline void CheckReferenceResult(mirror::Object* o, Thread* self)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (o == NULL) {
-    return;
-  }
-  mirror::ArtMethod* m = self->GetCurrentMethod(NULL);
-  if (o == kInvalidIndirectRefObject) {
-    JniAbortF(NULL, "invalid reference returned from %s", PrettyMethod(m).c_str());
-  }
-  // Make sure that the result is an instance of the type this method was expected to return.
-  StackHandleScope<1> hs(self);
-  Handle<mirror::ArtMethod> h_m(hs.NewHandle(m));
-  mirror::Class* return_type = MethodHelper(h_m).GetReturnType();
+void CheckReferenceResult(mirror::Object* o, Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  if (!o->InstanceOf(return_type)) {
-    JniAbortF(NULL, "attempt to return an instance of %s from %s", PrettyTypeOf(o).c_str(),
-              PrettyMethod(h_m.Get()).c_str());
-  }
-}
-
-static inline void CheckSuspend(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  for (;;) {
-    if (thread->ReadFlag(kCheckpointRequest)) {
-      thread->RunCheckpointFunction();
-    } else if (thread->ReadFlag(kSuspendRequest)) {
-      thread->FullSuspendCheck();
-    } else {
-      break;
-    }
-  }
-}
+static inline void CheckSuspend(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_art_method_jobj,
@@ -750,26 +225,6 @@
   return GetQuickToInterpreterBridge();
 }
 
-static inline const void* GetPortableResolutionTrampoline(ClassLinker* class_linker) {
-  return class_linker->GetPortableResolutionTrampoline();
-}
-
-static inline const void* GetQuickResolutionTrampoline(ClassLinker* class_linker) {
-  return class_linker->GetQuickResolutionTrampoline();
-}
-
-static inline const void* GetPortableImtConflictTrampoline(ClassLinker* class_linker) {
-  return class_linker->GetPortableImtConflictTrampoline();
-}
-
-static inline const void* GetQuickImtConflictTrampoline(ClassLinker* class_linker) {
-  return class_linker->GetQuickImtConflictTrampoline();
-}
-
-static inline const void* GetQuickToInterpreterBridgeTrampoline(ClassLinker* class_linker) {
-  return class_linker->GetQuickToInterpreterBridgeTrampoline();
-}
-
 extern "C" void art_portable_proxy_invoke_handler();
 static inline const void* GetPortableProxyInvokeHandler() {
   return reinterpret_cast<void*>(art_portable_proxy_invoke_handler);
@@ -786,21 +241,7 @@
 }
 
 template <typename INT_TYPE, typename FLOAT_TYPE>
-static inline INT_TYPE art_float_to_integral(FLOAT_TYPE f) {
-  const INT_TYPE kMaxInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::max());
-  const INT_TYPE kMinInt = static_cast<INT_TYPE>(std::numeric_limits<INT_TYPE>::min());
-  const FLOAT_TYPE kMaxIntAsFloat = static_cast<FLOAT_TYPE>(kMaxInt);
-  const FLOAT_TYPE kMinIntAsFloat = static_cast<FLOAT_TYPE>(kMinInt);
-  if (LIKELY(f > kMinIntAsFloat)) {
-     if (LIKELY(f < kMaxIntAsFloat)) {
-       return static_cast<INT_TYPE>(f);
-     } else {
-       return kMaxInt;
-     }
-  } else {
-    return (f != f) ? 0 : kMinInt;  // f != f implies NaN
-  }
-}
+static inline INT_TYPE art_float_to_integral(FLOAT_TYPE f);
 
 }  // namespace art
 
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 329c175..64faf76 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -18,7 +18,6 @@
 #include "interpreter/interpreter.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "reflection.h"
 #include "runtime.h"
 #include "stack.h"
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index bae4023..edb3b72 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -18,7 +18,6 @@
 #include "entrypoints/entrypoint_utils.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 
diff --git a/runtime/entrypoints/math_entrypoints.cc b/runtime/entrypoints/math_entrypoints.cc
index b839b63..b0eaf1e 100644
--- a/runtime/entrypoints/math_entrypoints.cc
+++ b/runtime/entrypoints/math_entrypoints.cc
@@ -16,7 +16,7 @@
 
 #include "math_entrypoints.h"
 
-#include "entrypoint_utils.h"
+#include "entrypoint_utils-inl.h"
 
 namespace art {
 
diff --git a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
index 4c05e75..de95f7d 100644
--- a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
 
diff --git a/runtime/entrypoints/portable/portable_cast_entrypoints.cc b/runtime/entrypoints/portable/portable_cast_entrypoints.cc
index a553a22..151b178 100644
--- a/runtime/entrypoints/portable/portable_cast_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_cast_entrypoints.cc
@@ -15,7 +15,7 @@
  */
 
 #include "common_throws.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/object-inl.h"
 
 namespace art {
diff --git a/runtime/entrypoints/portable/portable_dexcache_entrypoints.cc b/runtime/entrypoints/portable/portable_dexcache_entrypoints.cc
index b37ebcf..9364c46 100644
--- a/runtime/entrypoints/portable/portable_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_dexcache_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/portable/portable_field_entrypoints.cc b/runtime/entrypoints/portable/portable_field_entrypoints.cc
index f48f1a9..371aca4 100644
--- a/runtime/entrypoints/portable/portable_field_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_field_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
index 335a617..686954b 100644
--- a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
@@ -15,7 +15,7 @@
  */
 
 #include "dex_instruction.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
 
diff --git a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
index eb50ec3..6f9c083 100644
--- a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/portable/portable_jni_entrypoints.cc b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
index 3e7b30a..0d0f21b 100644
--- a/runtime/entrypoints/portable/portable_jni_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
 #include "thread-inl.h"
diff --git a/runtime/entrypoints/portable/portable_lock_entrypoints.cc b/runtime/entrypoints/portable/portable_lock_entrypoints.cc
index 358ac23..fcd3e9d 100644
--- a/runtime/entrypoints/portable/portable_lock_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_lock_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/object-inl.h"
 
 namespace art {
diff --git a/runtime/entrypoints/portable/portable_thread_entrypoints.cc b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
index 9e62e0e..23e1c36 100644
--- a/runtime/entrypoints/portable/portable_thread_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/object-inl.h"
 #include "verifier/dex_gc_map.h"
diff --git a/runtime/entrypoints/portable/portable_throw_entrypoints.cc b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
index 189e6b5..be6231c 100644
--- a/runtime/entrypoints/portable/portable_throw_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
@@ -15,7 +15,7 @@
  */
 
 #include "dex_instruction.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
 
@@ -80,7 +80,6 @@
   }
   mirror::Class* exception_type = exception->GetClass();
   StackHandleScope<1> hs(self);
-  MethodHelper mh(hs.NewHandle(current_method));
   const DexFile::CodeItem* code_item = current_method->GetCodeItem();
   DCHECK_LT(ti_offset, code_item->tries_size_);
   const DexFile::TryItem* try_item = DexFile::GetTryItems(*code_item, ti_offset);
@@ -98,7 +97,8 @@
       break;
     }
     // Does this catch exception type apply?
-    mirror::Class* iter_exception_type = mh.GetDexCacheResolvedType(iter_type_idx);
+    mirror::Class* iter_exception_type =
+        current_method->GetDexCacheResolvedTypes()->Get(iter_type_idx);
     if (UNLIKELY(iter_exception_type == NULL)) {
       // TODO: check, the verifier (class linker?) should take care of resolving all exception
       //       classes early.
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 2da016f..9f75b0f 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -18,11 +18,10 @@
 #define ART_RUNTIME_ENTRYPOINTS_PORTABLE_PORTABLE_ARGUMENT_VISITOR_H_
 
 #include "dex_instruction-inl.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "interpreter/interpreter.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -431,7 +430,7 @@
     // Expect class to at least be initializing.
     DCHECK(called->GetDeclaringClass()->IsInitializing());
     // Don't want infinite recursion.
-    DCHECK(code != GetPortableResolutionTrampoline(linker));
+    DCHECK(code != linker->GetPortableResolutionTrampoline());
     // Set up entry into main method
     *called_addr = called;
   }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index dde74de..1f2713a 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -17,7 +17,7 @@
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 
 #include "callee_save_frame.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 47fb9d6..f9f62c2 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -21,7 +21,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "stack.h"
 #include "thread.h"
 #include "verifier/method_verifier.h"
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 53c9b97..704db05 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -15,7 +15,7 @@
  */
 
 #include "callee_save_frame.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 032f6be..473687c 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -138,6 +138,10 @@
   void (*pThrowNoSuchMethod)(int32_t);
   void (*pThrowNullPointer)();
   void (*pThrowStackOverflow)(void*);
+
+  // Atomic 64-bit load/store
+  int64_t (*pA64Load)(volatile const int64_t *);
+  void (*pA64Store)(volatile int64_t *, int64_t);
 };
 
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 5cb0f36..cd1e247 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -16,7 +16,7 @@
 
 #include "callee_save_frame.h"
 #include "dex_file-inl.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index d161d0b..9a22c15 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -32,10 +32,15 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  const void* result = instrumentation->GetQuickCodeFor(method);
-  DCHECK(result != GetQuickToInterpreterBridgeTrampoline(Runtime::Current()->GetClassLinker()));
+  const void* result;
+  if (instrumentation->IsDeoptimized(method)) {
+    result = GetQuickToInterpreterBridge();
+  } else {
+    result = instrumentation->GetQuickCodeFor(method);
+  }
+  DCHECK(result != Runtime::Current()->GetClassLinker()->GetQuickToInterpreterBridgeTrampoline());
   bool interpreter_entry = (result == GetQuickToInterpreterBridge());
-  instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? NULL : this_object,
+  instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object,
                                                  method, lr, interpreter_entry);
   CHECK(result != NULL) << PrettyMethod(method);
   return result;
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 140b075..6537249 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -15,13 +15,12 @@
  */
 
 #include "dex_file-inl.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "verify_object-inl.h"
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index 5c48fc7..118cd7f 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -15,7 +15,7 @@
  */
 
 #include "callee_save_frame.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index e6f294a..879010e 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -16,9 +16,8 @@
 
 #include "callee_save_frame.h"
 #include "common_throws.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 95cb85e..338bd06 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -18,7 +18,7 @@
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "instruction_set.h"
 #include "interpreter/interpreter.h"
@@ -27,7 +27,6 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 
@@ -175,8 +174,8 @@
   static constexpr size_t kNumQuickGprArgs = 5;  // 5 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80;  // Offset of first GPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80 + 4*8;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168 + 4*8;  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     switch (gpr_index) {
       case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA));
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c572baf..ae1b94f 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -121,7 +121,7 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, kPointerSize);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, rosalloc_runs, kPointerSize);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, rosalloc_runs, thread_local_alloc_stack_top,
-                        kPointerSize * gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets);
+                        kPointerSize * kNumRosAllocThreadLocalSizeBrackets);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
                         kPointerSize);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, kPointerSize);
@@ -259,8 +259,10 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowDivZero, pThrowNoSuchMethod, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNoSuchMethod, pThrowNullPointer, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNullPointer, pThrowStackOverflow, kPointerSize);
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pA64Load, kPointerSize);
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Load, pA64Store, kPointerSize);
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pThrowStackOverflow)
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pA64Store)
             + kPointerSize == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 3112bc0..1b91628 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -15,23 +15,13 @@
  */
 
 #include "fault_handler.h"
+
 #include <sys/mman.h>
 #include <sys/ucontext.h>
-#include "base/macros.h"
-#include "globals.h"
-#include "base/logging.h"
-#include "base/hex_dump.h"
-#include "thread.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "object_utils.h"
-#include "scoped_thread_state_change.h"
-#ifdef HAVE_ANDROID_OS
+#include "mirror/art_method.h"
+#include "mirror/class.h"
 #include "sigchain.h"
-#endif
+#include "thread-inl.h"
 #include "verify_object-inl.h"
 
 namespace art {
@@ -47,6 +37,7 @@
 
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
+  // std::cout << "handling fault in ART handler\n";
   fault_manager.HandleFault(sig, info, context);
 }
 
@@ -55,10 +46,6 @@
 }
 
 FaultManager::~FaultManager() {
-#ifdef HAVE_ANDROID_OS
-  UnclaimSignalChain(SIGSEGV);
-#endif
-  sigaction(SIGSEGV, &oldaction_, nullptr);   // Restore old handler.
 }
 
 
@@ -72,11 +59,12 @@
 #endif
 
   // Set our signal handler now.
-  sigaction(SIGSEGV, &action, &oldaction_);
-#ifdef HAVE_ANDROID_OS
+  int e = sigaction(SIGSEGV, &action, &oldaction_);
+  if (e != 0) {
+    VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
+  }
   // Make sure our signal handler is called before any user handlers.
   ClaimSignalChain(SIGSEGV, &oldaction_);
-#endif
 }
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
@@ -84,8 +72,12 @@
   //
   // If malloc calls abort, it will be holding its lock.
   // If the handler tries to call malloc, it will deadlock.
+
+  // Also, there is only an 8K stack available here to logging can cause memory
+  // overwrite issues if you are unlucky.  If you want to enable logging and
+  // are getting crashes, allocate more space for the alternate signal stack.
   VLOG(signals) << "Handling fault";
-  if (IsInGeneratedCode(context, true)) {
+  if (IsInGeneratedCode(info, context, true)) {
     VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
       VLOG(signals) << "invoking Action on handler " << handler;
@@ -101,11 +93,8 @@
   }
   art_sigsegv_fault();
 
-#ifdef HAVE_ANDROID_OS
+  // Pass this on to the next handler in the chain, or the default if none.
   InvokeUserSignalHandler(sig, info, context);
-#else
-  oldaction_.sa_sigaction(sig, info, context);
-#endif
 }
 
 void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
@@ -132,7 +121,7 @@
 
 // This function is called within the signal handler.  It checks that
 // the mutator_lock is held (shared).  No annotalysis is done.
-bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
+bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
   VLOG(signals) << "Checking for generated code";
@@ -161,7 +150,7 @@
 
   // Get the architecture specific method address and return address.  These
   // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
-  GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
+  GetMethodAndReturnPCAndSP(siginfo, context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
@@ -242,12 +231,12 @@
 
 bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-  if (manager_->IsInGeneratedCode(context, false)) {
+  if (manager_->IsInGeneratedCode(siginfo, context, false)) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
     mirror::ArtMethod* method = nullptr;
     uintptr_t return_pc = 0;
     uintptr_t sp = 0;
-    manager_->GetMethodAndReturnPCAndSP(context, &method, &return_pc, &sp);
+    manager_->GetMethodAndReturnPCAndSP(siginfo, context, &method, &return_pc, &sp);
     Thread* self = Thread::Current();
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     StackReference<mirror::ArtMethod>* frame =
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 026f5b9..71c9977 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -43,9 +43,10 @@
   void HandleFault(int sig, siginfo_t* info, void* context);
   void AddHandler(FaultHandler* handler, bool generated_code);
   void RemoveHandler(FaultHandler* handler);
-  void GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+  void GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context, mirror::ArtMethod** out_method,
                                  uintptr_t* out_return_pc, uintptr_t* out_sp);
-  bool IsInGeneratedCode(void *context, bool check_dex_pc) NO_THREAD_SAFETY_ANALYSIS;
+  bool IsInGeneratedCode(siginfo_t* siginfo, void *context, bool check_dex_pc)
+                         NO_THREAD_SAFETY_ANALYSIS;
 
  private:
   std::vector<FaultHandler*> generated_code_handlers_;
diff --git a/runtime/field_helper.cc b/runtime/field_helper.cc
new file mode 100644
index 0000000..40daa6d
--- /dev/null
+++ b/runtime/field_helper.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "field_helper.h"
+
+#include "class_linker-inl.h"
+#include "dex_file.h"
+#include "mirror/dex_cache.h"
+#include "runtime.h"
+#include "thread-inl.h"
+
+namespace art {
+
+mirror::Class* FieldHelper::GetType(bool resolve) {
+  uint32_t field_index = field_->GetDexFieldIndex();
+  if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
+    return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
+                                                                 field_->GetTypeDescriptor());
+  }
+  const DexFile* dex_file = field_->GetDexFile();
+  const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+  mirror::Class* type = field_->GetDexCache()->GetResolvedType(field_id.type_idx_);
+  if (resolve && (type == nullptr)) {
+    type = Runtime::Current()->GetClassLinker()->ResolveType(field_id.type_idx_, field_.Get());
+    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
+  }
+  return type;
+}
+
+const char* FieldHelper::GetDeclaringClassDescriptor() {
+  uint32_t field_index = field_->GetDexFieldIndex();
+  if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
+    DCHECK(field_->IsStatic());
+    DCHECK_LT(field_index, 2U);
+    // 0 == Class[] interfaces; 1 == Class[][] throws;
+    declaring_class_descriptor_ = field_->GetDeclaringClass()->GetDescriptor();
+    return declaring_class_descriptor_.c_str();
+  }
+  const DexFile* dex_file = field_->GetDexFile();
+  const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+  return dex_file->GetFieldDeclaringClassDescriptor(field_id);
+}
+
+}  // namespace art
diff --git a/runtime/field_helper.h b/runtime/field_helper.h
new file mode 100644
index 0000000..5eae55e
--- /dev/null
+++ b/runtime/field_helper.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_FIELD_HELPER_H_
+#define ART_RUNTIME_FIELD_HELPER_H_
+
+#include "base/macros.h"
+#include "handle.h"
+#include "mirror/art_field.h"
+
+namespace art {
+
+class FieldHelper {
+ public:
+  explicit FieldHelper(Handle<mirror::ArtField> f) : field_(f) {}
+
+  void ChangeField(mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(new_f != nullptr);
+    field_.Assign(new_f);
+  }
+
+  mirror::ArtField* GetField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return field_.Get();
+  }
+
+  mirror::Class* GetType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // The returned const char* is only guaranteed to be valid for the lifetime of the FieldHelper.
+  // If you need it longer, copy it into a std::string.
+  const char* GetDeclaringClassDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  Handle<mirror::ArtField> field_;
+  std::string declaring_class_descriptor_;
+
+  DISALLOW_COPY_AND_ASSIGN(FieldHelper);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_FIELD_HELPER_H_
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index ad0a4f43..46b9363 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -50,8 +50,9 @@
 template <typename Visitor>
 inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
                               const Visitor& visitor, const byte minimum_age) const {
-  DCHECK(bitmap->HasAddress(scan_begin));
-  DCHECK(bitmap->HasAddress(scan_end - 1));  // scan_end is the byte after the last byte we scan.
+  DCHECK_GE(scan_begin, reinterpret_cast<byte*>(bitmap->HeapBegin()));
+  // scan_end is the byte after the last byte we scan.
+  DCHECK_LE(scan_end, reinterpret_cast<byte*>(bitmap->HeapLimit()));
   byte* card_cur = CardFromAddr(scan_begin);
   byte* card_end = CardFromAddr(scan_end);
   CheckCardValid(card_cur);
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index a95c003..ceb42e5 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -83,8 +83,6 @@
 
 CardTable::CardTable(MemMap* mem_map, byte* biased_begin, size_t offset)
     : mem_map_(mem_map), biased_begin_(biased_begin), offset_(offset) {
-  byte* __attribute__((unused)) begin = mem_map_->Begin() + offset_;
-  byte* __attribute__((unused)) end = mem_map_->End();
 }
 
 void CardTable::ClearSpaceCards(space::ContinuousSpace* space) {
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 228d1dc..2686af0 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -185,7 +185,7 @@
           << from_space->GetGcRetentionPolicy();
       LOG(INFO) << "ToSpace " << to_space->GetName() << " type "
           << to_space->GetGcRetentionPolicy();
-      heap->DumpSpaces();
+      heap->DumpSpaces(LOG(INFO));
       LOG(FATAL) << "FATAL ERROR";
     }
   }
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 1e9556a..fc4213e 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -23,14 +23,6 @@
 
 #include "atomic.h"
 #include "base/logging.h"
-#include "dex_file-inl.h"
-#include "heap_bitmap.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
-#include "object_utils.h"
-#include "space_bitmap-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index c0aa43e..39d1f9e 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -16,6 +16,13 @@
 
 #include "space_bitmap-inl.h"
 
+#include "base/stringprintf.h"
+#include "mem_map.h"
+#include "mirror/object-inl.h"
+#include "mirror/class.h"
+#include "mirror/art_field.h"
+#include "mirror/object_array.h"
+
 namespace art {
 namespace gc {
 namespace accounting {
@@ -46,6 +53,9 @@
 }
 
 template<size_t kAlignment>
+SpaceBitmap<kAlignment>::~SpaceBitmap() {}
+
+template<size_t kAlignment>
 SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::Create(
     const std::string& name, byte* heap_begin, size_t heap_capacity) {
   // Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord.
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 6d1ba87..a3073bd 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -54,8 +54,7 @@
   static SpaceBitmap* CreateFromMemMap(const std::string& name, MemMap* mem_map,
                                        byte* heap_begin, size_t heap_capacity);
 
-  ~SpaceBitmap() {
-  }
+  ~SpaceBitmap();
 
   // <offset> is the difference from .base to a pointer address.
   // <index> is the index of .bits that contains the bit representing
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 722576f..ad22a2e 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -68,9 +68,9 @@
              << ", capacity=" << std::dec << capacity_
              << ", max_capacity=" << std::dec << max_capacity_;
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    size_bracket_lock_names[i] =
+    size_bracket_lock_names_[i] =
         StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
-    size_bracket_locks_[i] = new Mutex(size_bracket_lock_names[i].c_str(), kRosAllocBracketLock);
+    size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
     current_runs_[i] = dedicated_full_run_;
   }
   DCHECK_EQ(footprint_, capacity_);
@@ -2112,30 +2112,40 @@
     // result in occasionally not releasing pages which we could release.
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall through.
       case kPageMapEmpty: {
-        // Only lock if we have an empty page since we want to prevent other threads racing in.
+        // This is currently the start of a free page run.
+        // Acquire the lock to prevent other threads racing in and modifying the page map.
         MutexLock mu(self, lock_);
         // Check that it's still empty after we acquired the lock since another thread could have
         // raced in and placed an allocation here.
-        pm = page_map_[i];
-        if (LIKELY(pm == kPageMapEmpty)) {
-          // The start of a free page run. Release pages.
+        if (IsFreePage(i)) {
+          // Free page runs can start with a released page if we coalesced a released page free
+          // page run with an empty page run.
           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
-          DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
-          size_t fpr_size = fpr->ByteSize(this);
-          DCHECK(IsAligned<kPageSize>(fpr_size));
-          byte* start = reinterpret_cast<byte*>(fpr);
-          reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
-          i += fpr_size / kPageSize;
-          DCHECK_LE(i, page_map_size_);
+          // There is a race condition where FreePage can coalesce fpr with the previous
+          // free page run before we acquire lock_. In that case free_page_runs_.find will not find
+          // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
+          // to the next page.
+          if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
+            size_t fpr_size = fpr->ByteSize(this);
+            DCHECK(IsAligned<kPageSize>(fpr_size));
+            byte* start = reinterpret_cast<byte*>(fpr);
+            reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
+            size_t pages = fpr_size / kPageSize;
+            CHECK_GT(pages, 0U) << "Infinite loop probable";
+            i += pages;
+            DCHECK_LE(i, page_map_size_);
+            break;
+          }
         }
-        break;
+        // Fall through.
       }
       case kPageMapLargeObject:      // Fall through.
       case kPageMapLargeObjectPart:  // Fall through.
       case kPageMapRun:              // Fall through.
       case kPageMapRunPart:          // Fall through.
-      case kPageMapReleased:         // Fall through since it is already released.
         ++i;
         break;  // Skip.
       default:
@@ -2175,6 +2185,34 @@
   return reclaimed_bytes;
 }
 
+void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
+  Thread* self = Thread::Current();
+  size_t largest_continuous_free_pages = 0;
+  WriterMutexLock wmu(self, bulk_free_lock_);
+  MutexLock mu(self, lock_);
+  for (FreePageRun* fpr : free_page_runs_) {
+    largest_continuous_free_pages = std::max(largest_continuous_free_pages,
+                                             fpr->ByteSize(this));
+  }
+  if (failed_alloc_bytes > kLargeSizeThreshold) {
+    // Large allocation.
+    size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
+    if (required_bytes > largest_continuous_free_pages) {
+      os << "; failed due to fragmentation (required continguous free "
+         << required_bytes << " bytes where largest contiguous free "
+         <<  largest_continuous_free_pages << " bytes)";
+    }
+  } else {
+    // Non-large allocation.
+    size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
+    if (required_bytes > largest_continuous_free_pages) {
+      os << "; failed due to fragmentation (required continguous free "
+         << required_bytes << " bytes for a new buffer where largest contiguous free "
+         <<  largest_continuous_free_pages << " bytes)";
+    }
+  }
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index fad0dc8..b2a5a3c 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -30,6 +30,7 @@
 #include "base/logging.h"
 #include "globals.h"
 #include "mem_map.h"
+#include "thread.h"
 #include "utils.h"
 
 namespace art {
@@ -261,7 +262,7 @@
   // The magic number for free pages.
   static const byte kMagicNumFree = 43;
   // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
-  static const size_t kNumOfSizeBrackets = 34;
+  static const size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets;
   // The number of smaller size brackets that are 16 bytes apart.
   static const size_t kNumOfQuantumSizeBrackets = 32;
   // The sizes (the slot sizes, in bytes) of the size brackets.
@@ -440,7 +441,7 @@
   // The mutexes, one per size bracket.
   Mutex* size_bracket_locks_[kNumOfSizeBrackets];
   // Bracket lock names (since locks only have char* names).
-  std::string size_bracket_lock_names[kNumOfSizeBrackets];
+  std::string size_bracket_lock_names_[kNumOfSizeBrackets];
   // The types of page map entries.
   enum {
     kPageMapReleased = 0,     // Zero and released back to the OS.
@@ -590,6 +591,8 @@
 
   // Verify for debugging.
   void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes);
 };
 
 }  // namespace allocator
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 0c0a89b..277525e 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -19,7 +19,6 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "gc/space/space-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 25cfe0f..bb85fa0 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -49,6 +49,7 @@
 }  // namespace accounting
 
 namespace space {
+  class BumpPointerSpace;
   class ContinuousMemMapAllocSpace;
   class ContinuousSpace;
 }  // namespace space
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 974952d..104ed36 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -32,10 +32,7 @@
 template<typename MarkVisitor, typename ReferenceVisitor>
 inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor,
                                        const ReferenceVisitor& ref_visitor) {
-  if (kIsDebugBuild && !IsMarked(obj)) {
-    heap_->DumpSpaces();
-    LOG(FATAL) << "Scanning unmarked object " << obj;
-  }
+  DCHECK(IsMarked(obj)) << "Scanning unmarked object " << obj << "\n" << heap_->DumpSpaces();
   obj->VisitReferences<false>(visitor, ref_visitor);
   if (kCountScannedTypes) {
     mirror::Class* klass = obj->GetClass<kVerifyNone>();
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 7e97b3b..95530be 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -313,10 +313,8 @@
       }
     }
   }
-  if (current_space_bitmap_ == nullptr) {
-    heap_->DumpSpaces();
-    LOG(FATAL) << "Could not find a default mark bitmap";
-  }
+  CHECK(current_space_bitmap_ != nullptr) << "Could not find a default mark bitmap\n"
+      << heap_->DumpSpaces();
 }
 
 void MarkSweep::ExpandMarkStack() {
@@ -943,12 +941,9 @@
 
 void MarkSweep::VerifyIsLive(const Object* obj) {
   if (!heap_->GetLiveBitmap()->Test(obj)) {
-    if (std::find(heap_->allocation_stack_->Begin(), heap_->allocation_stack_->End(), obj) ==
-        heap_->allocation_stack_->End()) {
-      // Object not found!
-      heap_->DumpSpaces();
-      LOG(FATAL) << "Found dead object " << obj;
-    }
+    accounting::ObjectStack* allocation_stack = heap_->allocation_stack_.get();
+    CHECK(std::find(allocation_stack->Begin(), allocation_stack->End(), obj) !=
+        allocation_stack->End()) << "Found dead object " << obj << "\n" << heap_->DumpSpaces();
   }
 }
 
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 47682cc..922a71c 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -64,34 +64,25 @@
     // Verify all the objects have the correct forward pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (!immune_region_.ContainsObject(obj)) {
-    if (from_space_->HasAddress(obj)) {
-      mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
-      // If the object has already been moved, return the new forward address.
-      if (UNLIKELY(forward_address == nullptr)) {
-        forward_address = MarkNonForwardedObject(obj);
-        DCHECK(forward_address != nullptr);
-        // Make sure to only update the forwarding address AFTER you copy the object so that the
-        // monitor word doesn't Get stomped over.
-        obj->SetLockWord(
-            LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)), false);
-        // Push the object onto the mark stack for later processing.
-        MarkStackPush(forward_address);
-      }
-      obj_ptr->Assign(forward_address);
-    } else {
-      BitmapSetSlowPathVisitor visitor(this);
-      if (kIsDebugBuild && mark_bitmap_->GetContinuousSpaceBitmap(obj) != nullptr) {
-        // If a bump pointer space only collection, we should not
-        // reach here as we don't/won't mark the objects in the
-        // non-moving space (except for the promoted objects.)  Note
-        // the non-moving space is added to the immune space.
-        DCHECK(!generational_ || whole_heap_collection_);
-      }
-      if (!mark_bitmap_->Set(obj, visitor)) {
-        // This object was not previously marked.
-        MarkStackPush(obj);
-      }
+  if (from_space_->HasAddress(obj)) {
+    mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
+    // If the object has already been moved, return the new forward address.
+    if (UNLIKELY(forward_address == nullptr)) {
+      forward_address = MarkNonForwardedObject(obj);
+      DCHECK(forward_address != nullptr);
+      // Make sure to only update the forwarding address AFTER you copy the object so that the
+      // monitor word doesn't Get stomped over.
+      obj->SetLockWord(
+          LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)), false);
+      // Push the object onto the mark stack for later processing.
+      MarkStackPush(forward_address);
+    }
+    obj_ptr->Assign(forward_address);
+  } else if (!collect_from_space_only_ && !immune_region_.ContainsObject(obj)) {
+    BitmapSetSlowPathVisitor visitor(this);
+    if (!mark_bitmap_->Set(obj, visitor)) {
+      // This object was not previously marked.
+      MarkStackPush(obj);
     }
   }
 }
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index cabfe21..c7c567f 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -63,23 +63,23 @@
   WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetLiveBitmap() != nullptr) {
-      if (space == to_space_) {
-        CHECK(to_space_->IsContinuousMemMapAllocSpace());
-        to_space_->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
-      } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-                 || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect
-                 // Add the main free list space and the non-moving
-                 // space to the immune space if a bump pointer space
-                 // only collection.
-                 || (generational_ && !whole_heap_collection_ &&
-                     (space == GetHeap()->GetNonMovingSpace() ||
-                      space == GetHeap()->GetPrimaryFreeListSpace()))) {
-        CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+    } else if (space->GetLiveBitmap() != nullptr) {
+      if (space == to_space_ || collect_from_space_only_) {
+        if (collect_from_space_only_) {
+          // Bind the main free list space and the non-moving space to the immune space if a bump
+          // pointer space only collection.
+          CHECK(space == to_space_ || space == GetHeap()->GetPrimaryFreeListSpace() ||
+                space == GetHeap()->GetNonMovingSpace());
+        }
+        CHECK(space->IsContinuousMemMapAllocSpace());
+        space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
       }
     }
   }
-  if (generational_ && !whole_heap_collection_) {
+  if (collect_from_space_only_) {
     // We won't collect the large object space if a bump pointer space only collection.
     is_large_object_space_immune_ = true;
   }
@@ -95,7 +95,7 @@
       bytes_promoted_(0),
       bytes_promoted_since_last_whole_heap_collection_(0),
       large_object_bytes_allocated_at_last_whole_heap_collection_(0),
-      whole_heap_collection_(true),
+      collect_from_space_only_(generational),
       collector_name_(name_),
       swap_semi_spaces_(true) {
 }
@@ -147,6 +147,10 @@
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     mark_bitmap_ = heap_->GetMarkBitmap();
   }
+  if (generational_) {
+    promo_dest_space_ = GetHeap()->GetPrimaryFreeListSpace();
+  }
+  fallback_space_ = GetHeap()->GetNonMovingSpace();
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
@@ -180,9 +184,9 @@
         GetCurrentIteration()->GetClearSoftReferences()) {
       // If an explicit, native allocation-triggered, or last attempt
       // collection, collect the whole heap.
-      whole_heap_collection_ = true;
+      collect_from_space_only_ = false;
     }
-    if (whole_heap_collection_) {
+    if (!collect_from_space_only_) {
       VLOG(heap) << "Whole heap collection";
       name_ = collector_name_ + " whole";
     } else {
@@ -191,7 +195,7 @@
     }
   }
 
-  if (!generational_ || whole_heap_collection_) {
+  if (!collect_from_space_only_) {
     // If non-generational, always clear soft references.
     // If generational, clear soft references if a whole heap collection.
     GetCurrentIteration()->SetClearSoftReferences(true);
@@ -227,8 +231,6 @@
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     MarkRoots();
-    // Mark roots of immune spaces.
-    UpdateAndMarkModUnion();
     // Recursively mark remaining objects.
     MarkReachableObjects();
   }
@@ -259,46 +261,6 @@
   }
 }
 
-void SemiSpace::UpdateAndMarkModUnion() {
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    // If the space is immune then we need to mark the references to other spaces.
-    if (immune_region_.ContainsSpace(space)) {
-      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
-      if (table != nullptr) {
-        // TODO: Improve naming.
-        TimingLogger::ScopedTiming t(
-            space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
-                                     "UpdateAndMarkImageModUnionTable",
-                                     GetTimings());
-        table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
-      } else if (heap_->FindRememberedSetFromSpace(space) != nullptr) {
-        DCHECK(kUseRememberedSet);
-        // If a bump pointer space only collection, the non-moving
-        // space is added to the immune space. The non-moving space
-        // doesn't have a mod union table, but has a remembered
-        // set. Its dirty cards will be scanned later in
-        // MarkReachableObjects().
-        DCHECK(generational_ && !whole_heap_collection_ &&
-               (space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace()))
-            << "Space " << space->GetName() << " "
-            << "generational_=" << generational_ << " "
-            << "whole_heap_collection_=" << whole_heap_collection_ << " ";
-      } else {
-        DCHECK(!kUseRememberedSet);
-        // If a bump pointer space only collection, the non-moving
-        // space is added to the immune space. But the non-moving
-        // space doesn't have a mod union table. Instead, its live
-        // bitmap will be scanned later in MarkReachableObjects().
-        DCHECK(generational_ && !whole_heap_collection_ &&
-               (space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace()))
-            << "Space " << space->GetName() << " "
-            << "generational_=" << generational_ << " "
-            << "whole_heap_collection_=" << whole_heap_collection_ << " ";
-      }
-    }
-  }
-}
-
 class SemiSpaceScanObjectVisitor {
  public:
   explicit SemiSpaceScanObjectVisitor(SemiSpace* ss) : semi_space_(ss) {}
@@ -355,20 +317,30 @@
     heap_->MarkAllocStackAsLive(live_stack);
     live_stack->Reset();
   }
-  t.NewTiming("UpdateAndMarkRememberedSets");
   for (auto& space : heap_->GetContinuousSpaces()) {
-    // If the space is immune and has no mod union table (the
-    // non-moving space when the bump pointer space only collection is
-    // enabled,) then we need to scan its live bitmap or dirty cards as roots
-    // (including the objects on the live stack which have just marked
-    // in the live bitmap above in MarkAllocStackAsLive().)
-    if (immune_region_.ContainsSpace(space) &&
-        heap_->FindModUnionTableFromSpace(space) == nullptr) {
-      DCHECK(generational_ && !whole_heap_collection_ &&
-             (space == GetHeap()->GetNonMovingSpace() || space == GetHeap()->GetPrimaryFreeListSpace()));
-      accounting::RememberedSet* rem_set = heap_->FindRememberedSetFromSpace(space);
-      if (kUseRememberedSet) {
-        DCHECK(rem_set != nullptr);
+    // If the space is immune then we need to mark the references to other spaces.
+    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+    if (table != nullptr) {
+      // TODO: Improve naming.
+      TimingLogger::ScopedTiming t2(
+          space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
+                                   "UpdateAndMarkImageModUnionTable",
+                                   GetTimings());
+      table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
+      DCHECK(GetHeap()->FindRememberedSetFromSpace(space) == nullptr);
+    } else if (collect_from_space_only_ && space->GetLiveBitmap() != nullptr) {
+      // If the space has no mod union table (the non-moving space and main spaces when the bump
+      // pointer space only collection is enabled,) then we need to scan its live bitmap or dirty
+      // cards as roots (including the objects on the live stack which have just marked in the live
+      // bitmap above in MarkAllocStackAsLive().)
+      DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
+          << "Space " << space->GetName() << " "
+          << "generational_=" << generational_ << " "
+          << "collect_from_space_only_=" << collect_from_space_only_;
+      accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space);
+      CHECK_EQ(rem_set != nullptr, kUseRememberedSet);
+      if (rem_set != nullptr) {
+        TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
         rem_set->UpdateAndMarkReferences(MarkHeapReferenceCallback, DelayReferenceReferentCallback,
                                          from_space_, this);
         if (kIsDebugBuild) {
@@ -383,7 +355,7 @@
                                         visitor);
         }
       } else {
-        DCHECK(rem_set == nullptr);
+        TimingLogger::ScopedTiming t2("VisitLiveBits", GetTimings());
         accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
         SemiSpaceScanObjectVisitor visitor(this);
         live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
@@ -393,9 +365,10 @@
     }
   }
 
+  CHECK_EQ(is_large_object_space_immune_, collect_from_space_only_);
   if (is_large_object_space_immune_) {
     TimingLogger::ScopedTiming t("VisitLargeObjects", GetTimings());
-    DCHECK(generational_ && !whole_heap_collection_);
+    DCHECK(collect_from_space_only_);
     // Delay copying the live set to the marked set until here from
     // BindBitmaps() as the large objects on the allocation stack may
     // be newly added to the live set above in MarkAllocStackAsLive().
@@ -506,19 +479,20 @@
 }
 
 mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) {
-  size_t object_size = obj->SizeOf();
+  const size_t object_size = obj->SizeOf();
   size_t bytes_allocated;
   mirror::Object* forward_address = nullptr;
   if (generational_ && reinterpret_cast<byte*>(obj) < last_gc_to_space_end_) {
     // If it's allocated before the last GC (older), move
     // (pseudo-promote) it to the main free list space (as sort
     // of an old generation.)
-    space::MallocSpace* promo_dest_space = GetHeap()->GetPrimaryFreeListSpace();
-    forward_address = promo_dest_space->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
-                                                          nullptr);
+    forward_address = promo_dest_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
+                                                           nullptr);
     if (UNLIKELY(forward_address == nullptr)) {
       // If out of space, fall back to the to-space.
       forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
+      // No logic for marking the bitmap, so it must be null.
+      DCHECK(to_space_->GetLiveBitmap() == nullptr);
     } else {
       bytes_promoted_ += bytes_allocated;
       // Dirty the card at the destionation as it may contain
@@ -526,12 +500,12 @@
       // space.
       GetHeap()->WriteBarrierEveryFieldOf(forward_address);
       // Handle the bitmaps marking.
-      accounting::ContinuousSpaceBitmap* live_bitmap = promo_dest_space->GetLiveBitmap();
+      accounting::ContinuousSpaceBitmap* live_bitmap = promo_dest_space_->GetLiveBitmap();
       DCHECK(live_bitmap != nullptr);
-      accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
+      accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space_->GetMarkBitmap();
       DCHECK(mark_bitmap != nullptr);
       DCHECK(!live_bitmap->Test(forward_address));
-      if (!whole_heap_collection_) {
+      if (collect_from_space_only_) {
         // If collecting the bump pointer spaces only, live_bitmap == mark_bitmap.
         DCHECK_EQ(live_bitmap, mark_bitmap);
 
@@ -559,12 +533,23 @@
         mark_bitmap->Set(forward_address);
       }
     }
-    DCHECK(forward_address != nullptr);
   } else {
     // If it's allocated after the last GC (younger), copy it to the to-space.
     forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
+    if (forward_address != nullptr && to_space_live_bitmap_ != nullptr) {
+      to_space_live_bitmap_->Set(forward_address);
+    }
   }
-  CHECK(forward_address != nullptr) << "Out of memory in the to-space.";
+  // If it's still null, attempt to use the fallback space.
+  if (UNLIKELY(forward_address == nullptr)) {
+    forward_address = fallback_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
+                                                         nullptr);
+    CHECK(forward_address != nullptr) << "Out of memory in the to-space and fallback space.";
+    accounting::ContinuousSpaceBitmap* bitmap = fallback_space_->GetLiveBitmap();
+    if (bitmap != nullptr) {
+      bitmap->Set(forward_address);
+    }
+  }
   ++objects_moved_;
   bytes_moved_ += bytes_allocated;
   // Copy over the object and add it to the mark stack since we still need to update its
@@ -579,11 +564,10 @@
     }
     forward_address->AssertReadBarrierPointer();
   }
-  if (to_space_live_bitmap_ != nullptr) {
-    to_space_live_bitmap_->Set(forward_address);
-  }
   DCHECK(to_space_->HasAddress(forward_address) ||
-         (generational_ && GetHeap()->GetPrimaryFreeListSpace()->HasAddress(forward_address)));
+         fallback_space_->HasAddress(forward_address) ||
+         (generational_ && promo_dest_space_->HasAddress(forward_address)))
+      << forward_address << "\n" << GetHeap()->DumpSpaces();
   return forward_address;
 }
 
@@ -648,7 +632,7 @@
 }
 
 bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
-  return space != from_space_ && space != to_space_ && !immune_region_.ContainsSpace(space);
+  return space != from_space_ && space != to_space_;
 }
 
 void SemiSpace::Sweep(bool swap_bitmaps) {
@@ -714,22 +698,20 @@
 // Scan anything that's on the mark stack.
 void SemiSpace::ProcessMarkStack() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  space::MallocSpace* promo_dest_space = nullptr;
   accounting::ContinuousSpaceBitmap* live_bitmap = nullptr;
-  if (generational_ && !whole_heap_collection_) {
+  if (collect_from_space_only_) {
     // If a bump pointer space only collection (and the promotion is
     // enabled,) we delay the live-bitmap marking of promoted objects
     // from MarkObject() until this function.
-    promo_dest_space = GetHeap()->GetPrimaryFreeListSpace();
-    live_bitmap = promo_dest_space->GetLiveBitmap();
+    live_bitmap = promo_dest_space_->GetLiveBitmap();
     DCHECK(live_bitmap != nullptr);
-    accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space_->GetMarkBitmap();
     DCHECK(mark_bitmap != nullptr);
     DCHECK_EQ(live_bitmap, mark_bitmap);
   }
   while (!mark_stack_->IsEmpty()) {
     Object* obj = mark_stack_->PopBack();
-    if (generational_ && !whole_heap_collection_ && promo_dest_space->HasAddress(obj)) {
+    if (collect_from_space_only_ && promo_dest_space_->HasAddress(obj)) {
       // obj has just been promoted. Mark the live bitmap for it,
       // which is delayed from MarkObject().
       DCHECK(!live_bitmap->Test(obj));
@@ -742,16 +724,12 @@
 inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
   // All immune objects are assumed marked.
-  if (immune_region_.ContainsObject(obj)) {
-    return obj;
-  }
   if (from_space_->HasAddress(obj)) {
     // Returns either the forwarding address or nullptr.
     return GetForwardingAddressInFromSpace(obj);
-  } else if (to_space_->HasAddress(obj)) {
-    // Should be unlikely.
-    // Already forwarded, must be marked.
-    return obj;
+  } else if (collect_from_space_only_ || immune_region_.ContainsObject(obj) ||
+             to_space_->HasAddress(obj)) {
+    return obj;  // Already forwarded, must be marked.
   }
   return mark_bitmap_->Test(obj) ? obj : nullptr;
 }
@@ -777,9 +755,9 @@
   if (generational_) {
     // Decide whether to do a whole heap collection or a bump pointer
     // only space collection at the next collection by updating
-    // whole_heap_collection.
-    if (!whole_heap_collection_) {
-      // Enable whole_heap_collection if the bytes promoted since the
+    // collect_from_space_only_.
+    if (collect_from_space_only_) {
+      // Disable collect_from_space_only_ if the bytes promoted since the
       // last whole heap collection or the large object bytes
       // allocated exceeds a threshold.
       bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
@@ -792,14 +770,14 @@
           current_los_bytes_allocated >=
           last_los_bytes_allocated + kLargeObjectBytesAllocatedThreshold;
       if (bytes_promoted_threshold_exceeded || large_object_bytes_threshold_exceeded) {
-        whole_heap_collection_ = true;
+        collect_from_space_only_ = false;
       }
     } else {
       // Reset the counters.
       bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
       large_object_bytes_allocated_at_last_whole_heap_collection_ =
           GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
-      whole_heap_collection_ = false;
+      collect_from_space_only_ = true;
     }
   }
   // Clear all of the spaces' mark bitmaps.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index bff0847..71a83f2 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -25,6 +25,7 @@
 #include "garbage_collector.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_region.h"
+#include "mirror/object_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
@@ -243,9 +244,14 @@
   // large objects were allocated at the last whole heap collection.
   uint64_t large_object_bytes_allocated_at_last_whole_heap_collection_;
 
-  // Used for the generational mode. When true, collect the whole
-  // heap. When false, collect only the bump pointer spaces.
-  bool whole_heap_collection_;
+  // Used for generational mode. When true, we only collect the from_space_.
+  bool collect_from_space_only_;
+
+  // The space which we are promoting into, only used for GSS.
+  space::ContinuousMemMapAllocSpace* promo_dest_space_;
+
+  // The space which we copy to if the to_space_ is full.
+  space::ContinuousMemMapAllocSpace* fallback_space_;
 
   // How many objects and bytes we moved, used so that we don't need to Get the size of the
   // to_space_ when calculating how many objects and bytes we freed.
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 530a3c9..ef5d56e 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -40,6 +40,9 @@
   kCollectorTypeHeapTrim,
   // A (mostly) concurrent copying collector.
   kCollectorTypeCC,
+  // A homogeneous space compaction collector used in background transition
+  // when both foreground and background collector are CMS.
+  kCollectorTypeHomogeneousSpaceCompact,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 9e73f14..f0e1512 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -31,6 +31,7 @@
     case kGcCauseForNativeAlloc: return "NativeAlloc";
     case kGcCauseCollectorTransition: return "CollectorTransition";
     case kGcCauseDisableMovingGc: return "DisableMovingGc";
+    case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
     case kGcCauseTrim: return "HeapTrim";
     default:
       LOG(FATAL) << "Unreachable";
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 10e6667..1f2643a 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -39,6 +39,8 @@
   kGcCauseDisableMovingGc,
   // Not a real GC cause, used when we trim the heap.
   kGcCauseTrim,
+  // GC triggered for background transition when both foreground and background collector are CMS.
+  kGcCauseHomogeneousSpaceCompact,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 58ba61b..7d3fd2d 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -41,11 +41,11 @@
                                                       const PreFenceVisitor& pre_fence_visitor) {
   if (kIsDebugBuild) {
     CheckPreconditionsForAllocObject(klass, byte_count);
+    // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
+    // done in the runnable state where suspension is expected.
+    CHECK_EQ(self->GetState(), kRunnable);
+    self->AssertThreadSuspensionIsAllowable();
   }
-  // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
-  // done in the runnable state where suspension is expected.
-  DCHECK_EQ(self->GetState(), kRunnable);
-  self->AssertThreadSuspensionIsAllowable();
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
   if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
@@ -54,49 +54,72 @@
   }
   mirror::Object* obj;
   AllocationTimer alloc_timer(this, &obj);
-  size_t bytes_allocated, usable_size;
-  obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
-                                            &usable_size);
-  if (UNLIKELY(obj == nullptr)) {
-    bool is_current_allocator = allocator == GetCurrentAllocator();
-    obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size,
-                                 &klass);
-    if (obj == nullptr) {
-      bool after_is_current_allocator = allocator == GetCurrentAllocator();
-      if (is_current_allocator && !after_is_current_allocator) {
-        // If the allocator changed, we need to restart the allocation.
-        return AllocObject<kInstrumented>(self, klass, byte_count, pre_fence_visitor);
+  size_t bytes_allocated;
+  size_t usable_size;
+  size_t new_num_bytes_allocated = 0;
+  if (allocator == kAllocatorTypeTLAB) {
+    byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
+  }
+  // If we have a thread local allocation we don't need to update bytes allocated.
+  if (allocator == kAllocatorTypeTLAB && byte_count <= self->TlabSize()) {
+    obj = self->AllocTlab(byte_count);
+    DCHECK(obj != nullptr) << "AllocTlab can't fail";
+    obj->SetClass(klass);
+    if (kUseBakerOrBrooksReadBarrier) {
+      if (kUseBrooksReadBarrier) {
+        obj->SetReadBarrierPointer(obj);
       }
-      return nullptr;
+      obj->AssertReadBarrierPointer();
     }
-  }
-  DCHECK_GT(bytes_allocated, 0u);
-  DCHECK_GT(usable_size, 0u);
-  obj->SetClass(klass);
-  if (kUseBakerOrBrooksReadBarrier) {
-    if (kUseBrooksReadBarrier) {
-      obj->SetReadBarrierPointer(obj);
+    bytes_allocated = byte_count;
+    usable_size = bytes_allocated;
+    pre_fence_visitor(obj, usable_size);
+    QuasiAtomic::ThreadFenceForConstructor();
+  } else {
+    obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
+                                              &usable_size);
+    if (UNLIKELY(obj == nullptr)) {
+      bool is_current_allocator = allocator == GetCurrentAllocator();
+      obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size,
+                                   &klass);
+      if (obj == nullptr) {
+        bool after_is_current_allocator = allocator == GetCurrentAllocator();
+        if (is_current_allocator && !after_is_current_allocator) {
+          // If the allocator changed, we need to restart the allocation.
+          return AllocObject<kInstrumented>(self, klass, byte_count, pre_fence_visitor);
+        }
+        return nullptr;
+      }
     }
-    obj->AssertReadBarrierPointer();
+    DCHECK_GT(bytes_allocated, 0u);
+    DCHECK_GT(usable_size, 0u);
+    obj->SetClass(klass);
+    if (kUseBakerOrBrooksReadBarrier) {
+      if (kUseBrooksReadBarrier) {
+        obj->SetReadBarrierPointer(obj);
+      }
+      obj->AssertReadBarrierPointer();
+    }
+    if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
+      // (Note this if statement will be constant folded away for the
+      // fast-path quick entry points.) Because SetClass() has no write
+      // barrier, if a non-moving space allocation, we need a write
+      // barrier as the class pointer may point to the bump pointer
+      // space (where the class pointer is an "old-to-young" reference,
+      // though rare) under the GSS collector with the remembered set
+      // enabled. We don't need this for kAllocatorTypeRosAlloc/DlMalloc
+      // cases because we don't directly allocate into the main alloc
+      // space (besides promotions) under the SS/GSS collector.
+      WriteBarrierField(obj, mirror::Object::ClassOffset(), klass);
+    }
+    pre_fence_visitor(obj, usable_size);
+    new_num_bytes_allocated =
+        static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated))
+        + bytes_allocated;
   }
-  if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
-    // (Note this if statement will be constant folded away for the
-    // fast-path quick entry points.) Because SetClass() has no write
-    // barrier, if a non-moving space allocation, we need a write
-    // barrier as the class pointer may point to the bump pointer
-    // space (where the class pointer is an "old-to-young" reference,
-    // though rare) under the GSS collector with the remembered set
-    // enabled. We don't need this for kAllocatorTypeRosAlloc/DlMalloc
-    // cases because we don't directly allocate into the main alloc
-    // space (besides promotions) under the SS/GSS collector.
-    WriteBarrierField(obj, mirror::Object::ClassOffset(), klass);
-  }
-  pre_fence_visitor(obj, usable_size);
   if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
     CHECK_LE(obj->SizeOf(), usable_size);
   }
-  const size_t new_num_bytes_allocated =
-      static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) + bytes_allocated;
   // TODO: Deprecate.
   if (kInstrumented) {
     if (Runtime::Current()->HasStatsEnabled()) {
@@ -158,7 +181,8 @@
 inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
                                            size_t alloc_size, size_t* bytes_allocated,
                                            size_t* usable_size) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+  if (allocator_type != kAllocatorTypeTLAB &&
+      UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
   mirror::Object* ret;
@@ -206,18 +230,24 @@
       break;
     }
     case kAllocatorTypeTLAB: {
-      alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
+      DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment);
       if (UNLIKELY(self->TlabSize() < alloc_size)) {
-        // Try allocating a new thread local buffer, if the allocaiton fails the space must be
-        // full so return nullptr.
-        if (!bump_pointer_space_->AllocNewTlab(self, alloc_size + kDefaultTLABSize)) {
+        const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) {
           return nullptr;
         }
+        // Try allocating a new thread local buffer, if the allocaiton fails the space must be
+        // full so return nullptr.
+        if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
+          return nullptr;
+        }
+        *bytes_allocated = new_tlab_size;
+      } else {
+        *bytes_allocated = 0;
       }
       // The allocation can't fail.
       ret = self->AllocTlab(alloc_size);
       DCHECK(ret != nullptr);
-      *bytes_allocated = alloc_size;
       *usable_size = alloc_size;
       break;
     }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 03baba0..4e38335 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -58,7 +58,6 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/reference-inl.h"
-#include "object_utils.h"
 #include "os.h"
 #include "reflection.h"
 #include "runtime.h"
@@ -93,6 +92,10 @@
 static constexpr size_t kAllocationStackReserveSize = 1024;
 // Default mark stack size in bytes.
 static const size_t kDefaultMarkStackSize = 64 * KB;
+// Define space name.
+static const char* kDlMallocSpaceName[2] = {"main dlmalloc space", "main dlmalloc space 1"};
+static const char* kRosAllocSpaceName[2] = {"main rosalloc space", "main rosalloc space 1"};
+static const char* kMemMapSpaceName[2] = {"main space", "main space 1"};
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
@@ -103,7 +106,8 @@
            bool ignore_max_footprint, bool use_tlab,
            bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
            bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-           bool verify_post_gc_rosalloc)
+           bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction_for_oom,
+           uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
       dlmalloc_space_(nullptr),
@@ -173,7 +177,11 @@
       verify_object_mode_(kVerifyObjectModeDisabled),
       disable_moving_gc_count_(0),
       running_on_valgrind_(Runtime::Current()->RunningOnValgrind()),
-      use_tlab_(use_tlab) {
+      use_tlab_(use_tlab),
+      main_space_backup_(nullptr),
+      min_interval_homogeneous_space_compaction_by_oom_(min_interval_homogeneous_space_compaction_by_oom),
+      last_time_homogeneous_space_compaction_by_oom_(NanoTime()),
+      use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
@@ -205,30 +213,90 @@
     CHECK_GT(oat_file_end_addr, image_space->End());
     requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
   }
+
+  /*
+  requested_alloc_space_begin ->     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+                                     +-  nonmoving space (kNonMovingSpaceCapacity) +-
+                                     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+                                     +-        main alloc space (capacity_)        +-
+                                     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+                                     +-       main alloc space 1 (capacity_)       +-
+                                     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+  */
+  bool create_backup_main_space =
+      background_collector_type == gc::kCollectorTypeHomogeneousSpaceCompact ||
+      use_homogeneous_space_compaction_for_oom;
   if (is_zygote) {
     // Reserve the address range before we create the non moving space to make sure bitmaps don't
     // take it.
     std::string error_str;
-    MemMap* mem_map = MemMap::MapAnonymous(
-        "main space", requested_alloc_space_begin + kNonMovingSpaceCapacity, capacity,
+    MemMap* main_space_map = MemMap::MapAnonymous(
+        kMemMapSpaceName[0], requested_alloc_space_begin + kNonMovingSpaceCapacity, capacity_,
         PROT_READ | PROT_WRITE, true, &error_str);
-    CHECK(mem_map != nullptr) << error_str;
+    CHECK(main_space_map != nullptr) << error_str;
+    MemMap* main_space_1_map = nullptr;
+    // Attempt to reserve an extra mem_map for homogeneous space compaction right after the main space map.
+    if (create_backup_main_space) {
+      main_space_1_map = MemMap::MapAnonymous(kMemMapSpaceName[1], main_space_map->End(), capacity_,
+                                               PROT_READ | PROT_WRITE, true, &error_str);
+      if (main_space_1_map == nullptr) {
+        LOG(WARNING) << "Failed to create map " <<  kMemMapSpaceName[1] << " with error "
+                     << error_str;
+      }
+    }
     // Non moving space is always dlmalloc since we currently don't have support for multiple
-    // rosalloc spaces.
+    // active rosalloc spaces.
     non_moving_space_ = space::DlMallocSpace::Create(
-        "zygote / non moving space", initial_size, kNonMovingSpaceCapacity, kNonMovingSpaceCapacity,
-        requested_alloc_space_begin, false);
+        "zygote / non moving space", initial_size, kNonMovingSpaceCapacity,
+        kNonMovingSpaceCapacity, requested_alloc_space_begin, false);
     non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
-    CreateMainMallocSpace(mem_map, initial_size, growth_limit, capacity);
+    CreateMainMallocSpace(main_space_map, initial_size, growth_limit_, capacity_);
+    if (main_space_1_map != nullptr) {
+      const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
+      main_space_backup_ = CreateMallocSpaceFromMemMap(main_space_1_map, initial_size,
+                                                       growth_limit_, capacity_, name, true);
+    }
   } else {
     std::string error_str;
-    MemMap* mem_map = MemMap::MapAnonymous("main/non-moving space", requested_alloc_space_begin,
-                                           capacity, PROT_READ | PROT_WRITE, true, &error_str);
-    CHECK(mem_map != nullptr) << error_str;
+    byte* request_begin = requested_alloc_space_begin;
+    if (request_begin == nullptr) {
+      // Disable homogeneous space compaction since we don't have an image.
+      create_backup_main_space = false;
+    }
+    MemMap* main_space_1_map = nullptr;
+    if (create_backup_main_space) {
+      request_begin += kNonMovingSpaceCapacity;
+      // Attempt to reserve an extra mem_map for homogeneous space compaction right after the main space map.
+      main_space_1_map = MemMap::MapAnonymous(kMemMapSpaceName[1], request_begin + capacity_,
+                                               capacity_, PROT_READ | PROT_WRITE, true, &error_str);
+      if (main_space_1_map == nullptr) {
+        LOG(WARNING) << "Failed to create map " <<  kMemMapSpaceName[1] << " with error "
+                     << error_str;
+        request_begin = requested_alloc_space_begin;
+      }
+    }
+    MemMap* main_space_map = MemMap::MapAnonymous(kMemMapSpaceName[0], request_begin, capacity_,
+                                                  PROT_READ | PROT_WRITE, true, &error_str);
+    CHECK(main_space_map != nullptr) << error_str;
+    // Introduce a seperate non moving space.
+    if (main_space_1_map != nullptr) {
+      // Do this before creating the main malloc space to prevent bitmaps from being placed here.
+      non_moving_space_ = space::DlMallocSpace::Create(
+          "non moving space", kDefaultInitialSize, kNonMovingSpaceCapacity, kNonMovingSpaceCapacity,
+          requested_alloc_space_begin, false);
+      non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+    }
     // Create the main free list space, which doubles as the non moving space. We can do this since
     // non zygote means that we won't have any background compaction.
-    CreateMainMallocSpace(mem_map, initial_size, growth_limit, capacity);
-    non_moving_space_ = main_space_;
+    CreateMainMallocSpace(main_space_map, initial_size, growth_limit_, capacity_);
+    if (main_space_1_map != nullptr) {
+      const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
+      main_space_backup_ = CreateMallocSpaceFromMemMap(main_space_1_map, initial_size,
+                                                       growth_limit_, capacity_, name, true);
+      CHECK(main_space_backup_ != nullptr);
+    } else {
+      non_moving_space_ = main_space_;
+    }
   }
   CHECK(non_moving_space_ != nullptr);
 
@@ -240,7 +308,7 @@
       (IsMovingGc(foreground_collector_type_) || IsMovingGc(background_collector_type_))) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
     // Divide by 2 for a temporary fix for reducing virtual memory usage.
-    const size_t bump_pointer_space_capacity = capacity / 2;
+    const size_t bump_pointer_space_capacity = capacity_ / 2;
     bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
                                                           bump_pointer_space_capacity, nullptr);
     CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
@@ -253,13 +321,25 @@
   if (non_moving_space_ != main_space_) {
     AddSpace(non_moving_space_);
   }
+  if (main_space_backup_ != nullptr) {
+    AddSpace(main_space_backup_);
+  } else {
+    const char* disable_msg = "Disabling homogenous space compact due to no backup main space";
+    if (background_collector_type_ == gc::kCollectorTypeHomogeneousSpaceCompact) {
+      background_collector_type_ = collector_type_;
+      LOG(WARNING) << disable_msg;
+    } else if (use_homogeneous_space_compaction_for_oom_) {
+      LOG(WARNING) << disable_msg;
+    }
+    use_homogeneous_space_compaction_for_oom_ = false;
+  }
   if (main_space_ != nullptr) {
     AddSpace(main_space_);
   }
 
   // Allocate the large object space.
   if (kUseFreeListSpaceForLOS) {
-    large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity);
+    large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity_);
   } else {
     large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
   }
@@ -328,7 +408,7 @@
   }
   if (kMovingCollector) {
     // TODO: Clean this up.
-    bool generational = foreground_collector_type_ == kCollectorTypeGSS;
+    const bool generational = foreground_collector_type_ == kCollectorTypeGSS;
     semi_space_collector_ = new collector::SemiSpace(this, generational,
                                                      generational ? "generational" : "");
     garbage_collectors_.push_back(semi_space_collector_);
@@ -339,9 +419,8 @@
   }
 
   if (GetImageSpace() != nullptr && main_space_ != nullptr) {
-    // Check that there's no gap between the image space and the main
-    // space so that the immune region won't break (eg. due to a large
-    // object allocated in the gap).
+    // Check that there's no gap between the image space and the main space so that the immune
+    // region won't break (eg. due to a large object allocated in the gap).
     bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(), main_space_->GetMemMap());
     if (!no_gap) {
       MemMap::DumpMaps(LOG(ERROR));
@@ -358,11 +437,36 @@
   }
 }
 
+space::MallocSpace* Heap::CreateMallocSpaceFromMemMap(MemMap* mem_map, size_t initial_size,
+                                                      size_t growth_limit, size_t capacity,
+                                                      const char* name, bool can_move_objects) {
+  space::MallocSpace* malloc_space = nullptr;
+  if (kUseRosAlloc) {
+    // Create rosalloc space.
+    malloc_space = space::RosAllocSpace::CreateFromMemMap(mem_map, name, kDefaultStartingSize,
+                                                          initial_size, growth_limit, capacity,
+                                                          low_memory_mode_, can_move_objects);
+  } else {
+    malloc_space = space::DlMallocSpace::CreateFromMemMap(mem_map, name, kDefaultStartingSize,
+                                                          initial_size, growth_limit, capacity,
+                                                          can_move_objects);
+  }
+  if (collector::SemiSpace::kUseRememberedSet) {
+    accounting::RememberedSet* rem_set  =
+        new accounting::RememberedSet(std::string(name) + " remembered set", this, malloc_space);
+    CHECK(rem_set != nullptr) << "Failed to create main space remembered set";
+    AddRememberedSet(rem_set);
+  }
+  CHECK(malloc_space != nullptr) << "Failed to create " << name;
+  malloc_space->SetFootprintLimit(malloc_space->Capacity());
+  return malloc_space;
+}
+
 void Heap::CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
                                  size_t capacity) {
   // Is background compaction is enabled?
   bool can_move_objects = IsMovingGc(background_collector_type_) !=
-      IsMovingGc(foreground_collector_type_);
+      IsMovingGc(foreground_collector_type_) || use_homogeneous_space_compaction_for_oom_;
   // If we are the zygote and don't yet have a zygote space, it means that the zygote fork will
   // happen in the future. If this happens and we have kCompactZygote enabled we wish to compact
   // from the main space to the zygote space. If background compaction is enabled, always pass in
@@ -375,26 +479,10 @@
   if (collector::SemiSpace::kUseRememberedSet && main_space_ != nullptr) {
     RemoveRememberedSet(main_space_);
   }
-  if (kUseRosAlloc) {
-    rosalloc_space_ = space::RosAllocSpace::CreateFromMemMap(
-        mem_map, "main rosalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
-        low_memory_mode_, can_move_objects);
-    main_space_ = rosalloc_space_;
-    CHECK(main_space_ != nullptr) << "Failed to create rosalloc space";
-  } else {
-    dlmalloc_space_ = space::DlMallocSpace::CreateFromMemMap(
-        mem_map, "main dlmalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
-        can_move_objects);
-    main_space_ = dlmalloc_space_;
-    CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
-  }
-  main_space_->SetFootprintLimit(main_space_->Capacity());
-  if (collector::SemiSpace::kUseRememberedSet) {
-    accounting::RememberedSet* main_space_rem_set =
-        new accounting::RememberedSet("Main space remembered set", this, main_space_);
-    CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
-    AddRememberedSet(main_space_rem_set);
-  }
+  const char* name = kUseRosAlloc ? kRosAllocSpaceName[0] : kDlMallocSpaceName[0];
+  main_space_ = CreateMallocSpaceFromMemMap(mem_map, initial_size, growth_limit, capacity, name,
+                                            can_move_objects);
+  SetSpaceAsDefault(main_space_);
   VLOG(heap) << "Created main space " << main_space_;
 }
 
@@ -547,8 +635,11 @@
       RequestCollectorTransition(foreground_collector_type_, 0);
     } else {
       // Don't delay for debug builds since we may want to stress test the GC.
-      RequestCollectorTransition(background_collector_type_, kIsDebugBuild ? 0 :
-          kCollectorTransitionWait);
+      // If background_collector_type_ is kCollectorTypeHomogeneousSpaceCompact then we have
+      // special handling which does a homogenous space compaction once but then doesn't transition
+      // the collector.
+      RequestCollectorTransition(background_collector_type_,
+                                 kIsDebugBuild ? 0 : kCollectorTransitionWait);
     }
   }
 }
@@ -605,7 +696,7 @@
 }
 
 void Heap::AddSpace(space::Space* space) {
-  DCHECK(space != nullptr);
+  CHECK(space != nullptr);
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   if (space->IsContinuousSpace()) {
     DCHECK(!space->IsDiscontinuousSpace());
@@ -801,37 +892,23 @@
   return NULL;
 }
 
-static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) {
-  size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start);
-  if (used_bytes < chunk_size) {
-    size_t chunk_free_bytes = chunk_size - used_bytes;
-    size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg);
-    max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes);
-  }
-}
-
-void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) {
+void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
   std::ostringstream oss;
   size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
       << " free bytes";
   // If the allocation failed due to fragmentation, print out the largest continuous allocation.
-  if (!large_object_allocation && total_bytes_free >= byte_count) {
-    size_t max_contiguous_allocation = 0;
-    for (const auto& space : continuous_spaces_) {
-      if (space->IsMallocSpace()) {
-        // To allow the Walk/InspectAll() to exclusively-lock the mutator
-        // lock, temporarily release the shared access to the mutator
-        // lock here by transitioning to the suspended state.
-        Locks::mutator_lock_->AssertSharedHeld(self);
-        self->TransitionFromRunnableToSuspended(kSuspended);
-        space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
-        self->TransitionFromSuspendedToRunnable();
-        Locks::mutator_lock_->AssertSharedHeld(self);
-      }
+  if (total_bytes_free >= byte_count) {
+    space::MallocSpace* space = nullptr;
+    if (allocator_type == kAllocatorTypeNonMoving) {
+      space = non_moving_space_;
+    } else if (allocator_type == kAllocatorTypeRosAlloc ||
+               allocator_type == kAllocatorTypeDlMalloc) {
+      space = main_space_;
     }
-    oss << "; failed due to fragmentation (largest possible contiguous allocation "
-        <<  max_contiguous_allocation << " bytes)";
+    if (space != nullptr) {
+      space->LogFragmentationAllocFailure(oss, byte_count);
+    }
   }
   self->ThrowOutOfMemoryError(oss.str().c_str());
 }
@@ -854,6 +931,15 @@
     ScopedThreadStateChange tsc(self, kSleeping);
     usleep(wait_time / 1000);  // Usleep takes microseconds.
   }
+  // Launch homogeneous space compaction if it is desired.
+  if (desired_collector_type == kCollectorTypeHomogeneousSpaceCompact) {
+    if (!CareAboutPauseTimes()) {
+      PerformHomogeneousSpaceCompact();
+    }
+    // No need to Trim(). Homogeneous space compaction may free more virtual and physical memory.
+    desired_collector_type = collector_type_;
+    return;
+  }
   // Transition the collector if the desired collector type is not the same as the current
   // collector type.
   TransitionCollector(desired_collector_type);
@@ -1035,7 +1121,13 @@
   return false;
 }
 
-void Heap::DumpSpaces(std::ostream& stream) {
+std::string Heap::DumpSpaces() const {
+  std::ostringstream oss;
+  DumpSpaces(oss);
+  return oss.str();
+}
+
+void Heap::DumpSpaces(std::ostream& stream) const {
   for (const auto& space : continuous_spaces_) {
     accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
     accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
@@ -1069,10 +1161,7 @@
 
   if (verify_object_mode_ > kVerifyObjectModeFast) {
     // Note: the bitmap tests below are racy since we don't hold the heap bitmap lock.
-    if (!IsLiveObjectLocked(obj)) {
-      DumpSpaces();
-      LOG(FATAL) << "Object is dead: " << obj;
-    }
+    CHECK(IsLiveObjectLocked(obj)) << "Object is dead " << obj << "\n" << DumpSpaces();
   }
 }
 
@@ -1103,6 +1192,17 @@
   }
 }
 
+space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const {
+  for (const auto& space : continuous_spaces_) {
+    if (space->AsContinuousSpace()->IsRosAllocSpace()) {
+      if (space->AsContinuousSpace()->AsRosAllocSpace()->GetRosAlloc() == rosalloc) {
+        return space->AsContinuousSpace()->AsRosAllocSpace();
+      }
+    }
+  }
+  return nullptr;
+}
+
 mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator,
                                              size_t alloc_size, size_t* bytes_allocated,
                                              size_t* usable_size,
@@ -1184,8 +1284,46 @@
     return nullptr;
   }
   ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
+  if (ptr == nullptr && use_homogeneous_space_compaction_for_oom_) {
+    const uint64_t current_time = NanoTime();
+    if ((allocator == kAllocatorTypeRosAlloc || allocator == kAllocatorTypeDlMalloc) &&
+        current_time - last_time_homogeneous_space_compaction_by_oom_ >
+        min_interval_homogeneous_space_compaction_by_oom_) {
+      last_time_homogeneous_space_compaction_by_oom_ = current_time;
+      HomogeneousSpaceCompactResult result = PerformHomogeneousSpaceCompact();
+      switch (result) {
+        case HomogeneousSpaceCompactResult::kSuccess:
+          // If the allocation succeeded, we delayed an oom.
+          ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
+          if (ptr != nullptr) {
+            count_delayed_oom_++;
+          }
+          break;
+        case HomogeneousSpaceCompactResult::kErrorReject:
+          // Reject due to disabled moving GC.
+          break;
+        case HomogeneousSpaceCompactResult::kErrorVMShuttingDown:
+          // Throw OOM by default.
+          break;
+        default: {
+          LOG(FATAL) << "Unimplemented homogeneous space compaction result " << static_cast<size_t>(result);
+        }
+      }
+      // Always print that we ran homogeneous space compation since this can cause jank.
+      VLOG(heap) << "Ran heap homogeneous space compaction, "
+                << " requested defragmentation "
+                << count_requested_homogeneous_space_compaction_.LoadSequentiallyConsistent()
+                << " performed defragmentation "
+                << count_performed_homogeneous_space_compaction_.LoadSequentiallyConsistent()
+                << " ignored homogeneous space compaction "
+                << count_ignored_homogeneous_space_compaction_.LoadSequentiallyConsistent()
+                << " delayed count = "
+                << count_delayed_oom_.LoadSequentiallyConsistent();
+    }
+  }
+  // If the allocation hasn't succeeded by this point, throw an OOM error.
   if (ptr == nullptr) {
-    ThrowOutOfMemoryError(self, alloc_size, allocator == kAllocatorTypeLOS);
+    ThrowOutOfMemoryError(self, alloc_size, allocator);
   }
   return ptr;
 }
@@ -1342,6 +1480,66 @@
   CollectGarbageInternal(gc_plan_.back(), kGcCauseExplicit, clear_soft_references);
 }
 
+HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() {
+  Thread* self = Thread::Current();
+  // Inc requested homogeneous space compaction.
+  count_requested_homogeneous_space_compaction_++;
+  // Store performed homogeneous space compaction at a new request arrival.
+  ThreadList* tl = Runtime::Current()->GetThreadList();
+  ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
+  Locks::mutator_lock_->AssertNotHeld(self);
+  {
+    ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(kGcCauseHomogeneousSpaceCompact, self);
+    // Homogeneous space compaction is a copying transition, can't run it if the moving GC disable count
+    // is non zero.
+    // If the collecotr type changed to something which doesn't benefit from homogeneous space compaction,
+    // exit.
+    if (disable_moving_gc_count_ != 0 || IsMovingGc(collector_type_)) {
+      return HomogeneousSpaceCompactResult::kErrorReject;
+    }
+    collector_type_running_ = kCollectorTypeHomogeneousSpaceCompact;
+  }
+  if (Runtime::Current()->IsShuttingDown(self)) {
+    // Don't allow heap transitions to happen if the runtime is shutting down since these can
+    // cause objects to get finalized.
+    FinishGC(self, collector::kGcTypeNone);
+    return HomogeneousSpaceCompactResult::kErrorVMShuttingDown;
+  }
+  // Suspend all threads.
+  tl->SuspendAll();
+  uint64_t start_time = NanoTime();
+  // Launch compaction.
+  space::MallocSpace* to_space = main_space_backup_;
+  space::MallocSpace* from_space = main_space_;
+  to_space->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+  const uint64_t space_size_before_compaction = from_space->Size();
+  Compact(to_space, from_space, kGcCauseHomogeneousSpaceCompact);
+  // Leave as prot read so that we can still run ROSAlloc verification on this space.
+  from_space->GetMemMap()->Protect(PROT_READ);
+  const uint64_t space_size_after_compaction = to_space->Size();
+  std::swap(main_space_, main_space_backup_);
+  SetSpaceAsDefault(main_space_);  // Set as default to reset the proper dlmalloc space.
+  // Update performed homogeneous space compaction count.
+  count_performed_homogeneous_space_compaction_++;
+  // Print statics log and resume all threads.
+  uint64_t duration = NanoTime() - start_time;
+  LOG(INFO) << "Heap homogeneous space compaction took " << PrettyDuration(duration) << " size: "
+            << PrettySize(space_size_before_compaction) << " -> "
+            << PrettySize(space_size_after_compaction) << " compact-ratio: "
+            << std::fixed << static_cast<double>(space_size_after_compaction) /
+            static_cast<double>(space_size_before_compaction);
+  tl->ResumeAll();
+  // Finish GC.
+  reference_processor_.EnqueueClearedReferences(self);
+  GrowForUtilization(semi_space_collector_);
+  FinishGC(self, collector::kGcTypeFull);
+  return HomogeneousSpaceCompactResult::kSuccess;
+}
+
+
 void Heap::TransitionCollector(CollectorType collector_type) {
   if (collector_type == collector_type_) {
     return;
@@ -1396,7 +1594,7 @@
         // We are transitioning from non moving GC -> moving GC, since we copied from the bump
         // pointer space last transition it will be protected.
         bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-        Compact(bump_pointer_space_, main_space_);
+        Compact(bump_pointer_space_, main_space_, kGcCauseCollectorTransition);
         // Remove the main space so that we don't try to trim it, this doens't work for debug
         // builds since RosAlloc attempts to read the magic number from a protected page.
         RemoveSpace(main_space_);
@@ -1410,7 +1608,7 @@
         // Compact to the main space from the bump pointer space, don't need to swap semispaces.
         AddSpace(main_space_);
         main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-        Compact(main_space_, bump_pointer_space_);
+        Compact(main_space_, bump_pointer_space_, kGcCauseCollectorTransition);
       }
       break;
     }
@@ -1736,14 +1934,15 @@
 }
 
 void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
-                   space::ContinuousMemMapAllocSpace* source_space) {
+                   space::ContinuousMemMapAllocSpace* source_space,
+                   GcCause gc_cause) {
   CHECK(kMovingCollector);
   if (target_space != source_space) {
     // Don't swap spaces since this isn't a typical semi space collection.
     semi_space_collector_->SetSwapSemiSpaces(false);
     semi_space_collector_->SetFromSpace(source_space);
     semi_space_collector_->SetToSpace(target_space);
-    semi_space_collector_->Run(kGcCauseCollectorTransition, false);
+    semi_space_collector_->Run(gc_cause, false);
   } else {
     CHECK(target_space->IsBumpPointerSpace())
         << "In-place compaction is only supported for bump pointer spaces";
@@ -2154,7 +2353,7 @@
       accounting::RememberedSet* remembered_set = table_pair.second;
       remembered_set->Dump(LOG(ERROR) << remembered_set->GetName() << ": ");
     }
-    DumpSpaces();
+    DumpSpaces(LOG(ERROR));
   }
   return visitor.GetFailureCount();
 }
@@ -2271,12 +2470,7 @@
       visitor(*it);
     }
   }
-
-  if (visitor.Failed()) {
-    DumpSpaces();
-    return false;
-  }
-  return true;
+  return !visitor.Failed();
 }
 
 void Heap::SwapStacks(Thread* self) {
@@ -2374,9 +2568,8 @@
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     SwapStacks(self);
     // Sort the live stack so that we can quickly binary search it later.
-    if (!VerifyMissingCardMarks()) {
-      LOG(FATAL) << "Pre " << gc->GetName() << " missing card mark verification failed";
-    }
+    CHECK(VerifyMissingCardMarks()) << "Pre " << gc->GetName()
+                                    << " missing card mark verification failed\n" << DumpSpaces();
     SwapStacks(self);
   }
   if (verify_mod_union_table_) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c9ea03e..0da113f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -72,6 +72,10 @@
   class SemiSpace;
 }  // namespace collector
 
+namespace allocator {
+  class RosAlloc;
+}  // namespace allocator
+
 namespace space {
   class AllocSpace;
   class BumpPointerSpace;
@@ -97,6 +101,15 @@
   }
 };
 
+enum HomogeneousSpaceCompactResult {
+  // Success.
+  kSuccess,
+  // Reject due to disabled moving GC.
+  kErrorReject,
+  // System is shutting down.
+  kErrorVMShuttingDown,
+};
+
 // If true, use rosalloc/RosAllocSpace instead of dlmalloc/DlMallocSpace
 static constexpr bool kUseRosAlloc = true;
 
@@ -120,7 +133,7 @@
 
   static constexpr size_t kDefaultStartingSize = kPageSize;
   static constexpr size_t kDefaultInitialSize = 2 * MB;
-  static constexpr size_t kDefaultMaximumSize = 32 * MB;
+  static constexpr size_t kDefaultMaximumSize = 256 * MB;
   static constexpr size_t kDefaultMaxFree = 2 * MB;
   static constexpr size_t kDefaultMinFree = kDefaultMaxFree / 4;
   static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5);
@@ -151,7 +164,8 @@
                 bool ignore_max_footprint, bool use_tlab,
                 bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
                 bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-                bool verify_post_gc_rosalloc);
+                bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction,
+                uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
   ~Heap();
 
@@ -194,7 +208,6 @@
 
   void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
 
   void RegisterNativeAllocation(JNIEnv* env, int bytes);
   void RegisterNativeFree(JNIEnv* env, int bytes);
@@ -500,6 +513,9 @@
     return rosalloc_space_;
   }
 
+  // Return the corresponding rosalloc space.
+  space::RosAllocSpace* GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const;
+
   space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
   }
@@ -523,7 +539,8 @@
     }
   }
 
-  void DumpSpaces(std::ostream& stream = LOG(INFO));
+  std::string DumpSpaces() const WARN_UNUSED;
+  void DumpSpaces(std::ostream& stream) const;
 
   // Dump object should only be used by the signal handler.
   void DumpObject(std::ostream& stream, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
@@ -569,12 +586,19 @@
   }
 
  private:
+  // Compact source space to target space.
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
-               space::ContinuousMemMapAllocSpace* source_space)
+               space::ContinuousMemMapAllocSpace* source_space,
+               GcCause gc_cause)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
+  bool SupportHSpaceCompaction() const {
+    // Returns true if we can do hspace compaction.
+    return main_space_backup_ != nullptr;
+  }
+
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
     return
         allocator_type != kAllocatorTypeBumpPointer &&
@@ -585,7 +609,8 @@
   }
   static bool IsMovingGc(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
-        collector_type == kCollectorTypeCC || collector_type == kCollectorTypeMC;
+        collector_type == kCollectorTypeCC || collector_type == kCollectorTypeMC ||
+        collector_type == kCollectorTypeHomogeneousSpaceCompact;
   }
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -628,7 +653,7 @@
                                               size_t* usable_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
+  void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kGrow>
@@ -683,10 +708,18 @@
   // Find a collector based on GC type.
   collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
 
-  // Create the main free list space, typically either a RosAlloc space or DlMalloc space.
+  // Create a new alloc space and compact default alloc space to it.
+  HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact();
+
+  // Create the main free list malloc space, either a RosAlloc space or DlMalloc space.
   void CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
                              size_t capacity);
 
+  // Create a malloc space based on a mem map. Does not set the space as default.
+  space::MallocSpace* CreateMallocSpaceFromMemMap(MemMap* mem_map, size_t initial_size,
+                                                  size_t growth_limit, size_t capacity,
+                                                  const char* name, bool can_move_objects);
+
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection.
@@ -973,6 +1006,30 @@
   const bool running_on_valgrind_;
   const bool use_tlab_;
 
+  // Pointer to the space which becomes the new main space when we do homogeneous space compaction.
+  space::MallocSpace* main_space_backup_;
+
+  // Minimal interval allowed between two homogeneous space compactions caused by OOM.
+  uint64_t min_interval_homogeneous_space_compaction_by_oom_;
+
+  // Times of the last homogeneous space compaction caused by OOM.
+  uint64_t last_time_homogeneous_space_compaction_by_oom_;
+
+  // Saved OOMs by homogeneous space compaction.
+  Atomic<size_t> count_delayed_oom_;
+
+  // Count for requested homogeneous space compaction.
+  Atomic<size_t> count_requested_homogeneous_space_compaction_;
+
+  // Count for ignored homogeneous space compaction.
+  Atomic<size_t> count_ignored_homogeneous_space_compaction_;
+
+  // Count for performed homogeneous space compaction.
+  Atomic<size_t> count_performed_homogeneous_space_compaction_;
+
+  // Whether or not we use homogeneous space compaction to avoid OOM errors.
+  bool use_homogeneous_space_compaction_for_oom_;
+
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
   friend class collector::MarkSweep;
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index 4176f4a..e6b5c75 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -17,10 +17,11 @@
 #include "common_runtime_test.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/reference_processor-inl.h b/runtime/gc/reference_processor-inl.h
new file mode 100644
index 0000000..f619a15
--- /dev/null
+++ b/runtime/gc/reference_processor-inl.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_REFERENCE_PROCESSOR_INL_H_
+#define ART_RUNTIME_GC_REFERENCE_PROCESSOR_INL_H_
+
+#include "reference_processor.h"
+
+namespace art {
+namespace gc {
+
+inline bool ReferenceProcessor::SlowPathEnabled() {
+  return mirror::Reference::GetJavaLangRefReference()->GetSlowPathEnabled();
+}
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_REFERENCE_PROCESSOR_INL_H_
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index e52bc1f..d3641d1 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -17,7 +17,9 @@
 #include "reference_processor.h"
 
 #include "mirror/object-inl.h"
+#include "mirror/reference.h"
 #include "mirror/reference-inl.h"
+#include "reference_processor-inl.h"
 #include "reflection.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
@@ -27,18 +29,17 @@
 namespace gc {
 
 ReferenceProcessor::ReferenceProcessor()
-    : process_references_args_(nullptr, nullptr, nullptr), slow_path_enabled_(false),
+    : process_references_args_(nullptr, nullptr, nullptr),
       preserving_references_(false), lock_("reference processor lock", kReferenceProcessorLock),
       condition_("reference processor condition", lock_) {
 }
 
 void ReferenceProcessor::EnableSlowPath() {
-  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  slow_path_enabled_ = true;
+  mirror::Reference::GetJavaLangRefReference()->SetSlowPath(true);
 }
 
 void ReferenceProcessor::DisableSlowPath(Thread* self) {
-  slow_path_enabled_ = false;
+  mirror::Reference::GetJavaLangRefReference()->SetSlowPath(false);
   condition_.Broadcast(self);
 }
 
@@ -46,11 +47,11 @@
   mirror::Object* const referent = reference->GetReferent();
   // If the referent is null then it is already cleared, we can just return null since there is no
   // scenario where it becomes non-null during the reference processing phase.
-  if (LIKELY(!slow_path_enabled_) || referent == nullptr) {
+  if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) {
     return referent;
   }
   MutexLock mu(self, lock_);
-  while (slow_path_enabled_) {
+  while (SlowPathEnabled()) {
     mirror::HeapReference<mirror::Object>* const referent_addr =
         reference->GetReferentReferenceAddr();
     // If the referent became cleared, return it. Don't need barrier since thread roots can't get
@@ -117,7 +118,7 @@
     process_references_args_.is_marked_callback_ = is_marked_callback;
     process_references_args_.mark_callback_ = mark_object_callback;
     process_references_args_.arg_ = arg;
-    CHECK_EQ(slow_path_enabled_, concurrent) << "Slow path must be enabled iff concurrent";
+    CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent";
   }
   // Unless required to clear soft references with white references, preserve some white referents.
   if (!clear_soft_references) {
@@ -182,7 +183,7 @@
                                                 void* arg) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
   DCHECK(klass != nullptr);
-  DCHECK(klass->IsReferenceClass());
+  DCHECK(klass->IsTypeOfReferenceClass());
   mirror::HeapReference<mirror::Object>* referent = ref->GetReferentReferenceAddr();
   if (referent->AsMirrorPtr() != nullptr && !is_marked_callback(referent, arg)) {
     Thread* self = Thread::Current();
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 2771ea8..7274457 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -49,6 +49,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       LOCKS_EXCLUDED(lock_);
+  // The slow path bool is contained in the reference class object, can only be set once
   // Only allow setting this with mutators suspended so that we can avoid using a lock in the
   // GetReferent fast path as an optimization.
   void EnableSlowPath() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -60,7 +61,7 @@
                               IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void UpdateRoots(IsMarkedCallback* callback, void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
  private:
   class ProcessReferencesArgs {
@@ -75,8 +76,10 @@
     MarkObjectCallback* mark_callback_;
     void* arg_;
   };
+  bool SlowPathEnabled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Called by ProcessReferences.
-  void DisableSlowPath(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void DisableSlowPath(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // If we are preserving references it means that some dead objects may become live, we use start
   // and stop preserving to block mutators using GetReferrent from getting access to these
   // referents.
@@ -84,8 +87,6 @@
   void StopPreservingReferences(Thread* self) LOCKS_EXCLUDED(lock_);
   // Process args, used by the GetReferent to return referents which are already marked.
   ProcessReferencesArgs process_references_args_ GUARDED_BY(lock_);
-  // Boolean for whether or not we need to go slow path in GetReferent.
-  volatile bool slow_path_enabled_;
   // Boolean for whether or not we are preserving references (either soft references or finalizers).
   // If this is true, then we cannot return a referent (see comment in GetReferent).
   bool preserving_references_ GUARDED_BY(lock_);
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 5123e47..456d1b3 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -304,6 +304,30 @@
 }
 #endif
 
+static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start);
+  if (used_bytes < chunk_size) {
+    size_t chunk_free_bytes = chunk_size - used_bytes;
+    size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg);
+    max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes);
+  }
+}
+
+void DlMallocSpace::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
+  Thread* self = Thread::Current();
+  size_t max_contiguous_allocation = 0;
+  // To allow the Walk/InspectAll() to exclusively-lock the mutator
+  // lock, temporarily release the shared access to the mutator
+  // lock here by transitioning to the suspended state.
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  self->TransitionFromRunnableToSuspended(kSuspended);
+  Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+  self->TransitionFromSuspendedToRunnable();
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  os << "; failed due to fragmentation (largest possible contiguous allocation "
+     <<  max_contiguous_allocation << " bytes)";
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index accd26b..7aff14b 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -124,6 +124,9 @@
     return this;
   }
 
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  protected:
   DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
                 byte* limit, size_t growth_limit, bool can_move_objects, size_t starting_size,
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
index 129eace..02fc4a5 100644
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ b/runtime/gc/space/dlmalloc_space_base_test.cc
@@ -15,7 +15,9 @@
  */
 
 #include "space_test.h"
+
 #include "dlmalloc_space.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/dlmalloc_space_random_test.cc b/runtime/gc/space/dlmalloc_space_random_test.cc
index c4f8bae..4b1a1b1 100644
--- a/runtime/gc/space/dlmalloc_space_random_test.cc
+++ b/runtime/gc/space/dlmalloc_space_random_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "space_test.h"
+
 #include "dlmalloc_space.h"
 
 namespace art {
diff --git a/runtime/gc/space/dlmalloc_space_static_test.cc b/runtime/gc/space/dlmalloc_space_static_test.cc
index edaa198..d17d0a7 100644
--- a/runtime/gc/space/dlmalloc_space_static_test.cc
+++ b/runtime/gc/space/dlmalloc_space_static_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "space_test.h"
+
 #include "dlmalloc_space.h"
 
 namespace art {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index d534bcb..a87aa89 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -341,6 +341,10 @@
   return true;
 }
 
+const OatFile* ImageSpace::GetOatFile() const {
+  return oat_file_.get();
+}
+
 OatFile* ImageSpace::ReleaseOatFile() {
   CHECK(oat_file_.get() != NULL);
   return oat_file_.release();
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 372db3a..dd9b580 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -51,6 +51,9 @@
   static ImageHeader* ReadImageHeaderOrDie(const char* image_location,
                                            InstructionSet image_isa);
 
+  // Give access to the OatFile.
+  const OatFile* GetOatFile() const;
+
   // Releases the OatFile from the ImageSpace so it can be transfer to
   // the caller, presumably the ClassLinker.
   OatFile* ReleaseOatFile()
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index d24016c..6f49fbf 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -19,6 +19,7 @@
 
 #include "space.h"
 
+#include <iostream>
 #include <valgrind.h>
 #include <memcheck/memcheck.h>
 
@@ -132,6 +133,8 @@
     return can_move_objects_;
   }
 
+  virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0;
+
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
               byte* limit, size_t growth_limit, bool create_bitmaps, bool can_move_objects,
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 5738d47..92c6f53 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -227,7 +227,7 @@
 // Callback from rosalloc when it needs to increase the footprint
 extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  RosAllocSpace* rosalloc_space = heap->GetRosAllocSpace();
+  RosAllocSpace* rosalloc_space = heap->GetRosAllocSpace(rosalloc);
   DCHECK(rosalloc_space != nullptr);
   DCHECK_EQ(rosalloc_space->GetRosAlloc(), rosalloc);
   return rosalloc_space->MoreCore(increment);
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 2934af8..f505305 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -120,6 +120,10 @@
 
   virtual ~RosAllocSpace();
 
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE {
+    rosalloc_->LogFragmentationAllocFailure(os, failed_alloc_bytes);
+  }
+
  protected:
   RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
                 byte* begin, byte* end, byte* limit, size_t growth_limit, bool can_move_objects,
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index fff4df1..71c8eb5 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -407,11 +407,11 @@
   // Clear the space back to an empty space.
   virtual void Clear() = 0;
 
-  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return live_bitmap_.get();
   }
 
-  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
     return mark_bitmap_.get();
   }
 
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index a2d4942..0291155 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -17,8 +17,6 @@
 #ifndef ART_RUNTIME_GC_SPACE_SPACE_TEST_H_
 #define ART_RUNTIME_GC_SPACE_SPACE_TEST_H_
 
-#include "zygote_space.h"
-
 #include <stdint.h>
 #include <memory>
 
@@ -26,6 +24,8 @@
 #include "globals.h"
 #include "mirror/array-inl.h"
 #include "mirror/object-inl.h"
+#include "scoped_thread_state_change.h"
+#include "zygote_space.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/globals.h b/runtime/globals.h
index 3a906f1..1d9f22c 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -105,6 +105,19 @@
 // If true, references within the heap are poisoned (negated).
 static constexpr bool kPoisonHeapReferences = false;
 
+// Kinds of tracing clocks.
+enum TraceClockSource {
+  kTraceClockSourceThreadCpu,
+  kTraceClockSourceWall,
+  kTraceClockSourceDual,  // Both wall and thread CPU clocks.
+};
+
+#if defined(HAVE_POSIX_CLOCKS)
+static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceDual;
+#else
+static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceWall;
+#endif
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/handle.h b/runtime/handle.h
index 7e13601..f70faf4 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -28,29 +28,40 @@
 
 template<class T> class Handle;
 
+// Handles are memory locations that contain GC roots. As the mirror::Object*s within a handle are
+// GC visible then the GC may move the references within them, something that couldn't be done with
+// a wrap pointer. Handles are generally allocated within HandleScopes. ConstHandle is a super-class
+// of Handle and doesn't support assignment operations.
 template<class T>
 class ConstHandle {
  public:
   ConstHandle() : reference_(nullptr) {
   }
-  ConstHandle(const ConstHandle<T>& handle) ALWAYS_INLINE : reference_(handle.reference_) {
+
+  ALWAYS_INLINE ConstHandle(const ConstHandle<T>& handle) : reference_(handle.reference_) {
   }
-  ConstHandle<T>& operator=(const ConstHandle<T>& handle) ALWAYS_INLINE {
+
+  ALWAYS_INLINE ConstHandle<T>& operator=(const ConstHandle<T>& handle) {
     reference_ = handle.reference_;
     return *this;
   }
-  explicit ConstHandle(StackReference<T>* reference) ALWAYS_INLINE : reference_(reference) {
+
+  ALWAYS_INLINE explicit ConstHandle(StackReference<T>* reference) : reference_(reference) {
   }
-  T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+
+  ALWAYS_INLINE T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *Get();
   }
-  T* operator->() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+
+  ALWAYS_INLINE T* operator->() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return Get();
   }
-  T* Get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+
+  ALWAYS_INLINE T* Get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return reference_->AsMirrorPtr();
   }
-  jobject ToJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+
+  ALWAYS_INLINE jobject ToJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
       // Special case so that we work with NullHandles.
       return nullptr;
@@ -73,8 +84,8 @@
   StackReference<T>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
     return reference_;
   }
-  const StackReference<T>* GetReference() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      ALWAYS_INLINE {
+  ALWAYS_INLINE const StackReference<T>* GetReference() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return reference_;
   }
 
@@ -86,47 +97,54 @@
   template<size_t kNumReferences> friend class StackHandleScope;
 };
 
+// Handles that support assignment.
 template<class T>
 class Handle : public ConstHandle<T> {
  public:
   Handle() {
   }
-  Handle(const Handle<T>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE
+
+  ALWAYS_INLINE Handle(const Handle<T>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : ConstHandle<T>(handle.reference_) {
   }
-  Handle<T>& operator=(const Handle<T>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      ALWAYS_INLINE {
+
+  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ConstHandle<T>::operator=(handle);
     return *this;
   }
-  explicit Handle(StackReference<T>* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      ALWAYS_INLINE : ConstHandle<T>(reference) {
+
+  ALWAYS_INLINE explicit Handle(StackReference<T>* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : ConstHandle<T>(reference) {
   }
-  T* Assign(T* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+
+  ALWAYS_INLINE T* Assign(T* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     StackReference<T>* ref = ConstHandle<T>::GetReference();
     T* const old = ref->AsMirrorPtr();
     ref->Assign(reference);
     return old;
   }
 
+  template<typename S>
+  explicit Handle(const Handle<S>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : ConstHandle<T>(handle) {
+  }
+
  protected:
   template<typename S>
   explicit Handle(StackReference<S>* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : ConstHandle<T>(reference) {
   }
-  template<typename S>
-  explicit Handle(const Handle<S>& handle) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : ConstHandle<T>(handle) {
-  }
 
  private:
   friend class BuildGenericJniFrameVisitor;
-  template<class S> friend class Handle;
   friend class HandleScope;
   template<class S> friend class HandleWrapper;
   template<size_t kNumReferences> friend class StackHandleScope;
 };
 
+// A special case of Handle that only holds references to null.
 template<class T>
 class NullHandle : public Handle<T> {
  public:
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index 62c7614..7bc811d 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_HANDLE_SCOPE_INL_H_
 #define ART_RUNTIME_HANDLE_SCOPE_INL_H_
 
-#include "handle_scope-inl.h"
+#include "handle_scope.h"
 
 #include "handle.h"
 #include "thread.h"
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index 629e4ec..42ef779 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -27,10 +27,12 @@
 namespace mirror {
 class Object;
 }
+
 class Thread;
 
-// HandleScopes can be allocated within the bridge frame between managed and native code backed by
-// stack storage or manually allocated in native.
+// HandleScopes are scoped objects containing a number of Handles. They are used to allocate
+// handles, for these handles (and the objects contained within them) to be visible/roots for the
+// GC. It is most common to stack allocate HandleScopes using StackHandleScope.
 class PACKED(4) HandleScope {
  public:
   ~HandleScope() {}
@@ -130,6 +132,7 @@
 
  private:
   template<size_t kNumReferences> friend class StackHandleScope;
+
   DISALLOW_COPY_AND_ASSIGN(HandleScope);
 };
 
@@ -152,7 +155,7 @@
 
 // Scoped handle storage of a fixed size that is usually stack allocated.
 template<size_t kNumReferences>
-class PACKED(4) StackHandleScope : public HandleScope {
+class PACKED(4) StackHandleScope FINAL : public HandleScope {
  public:
   explicit StackHandleScope(Thread* self);
   ~StackHandleScope();
@@ -181,20 +184,29 @@
   template<class T>
   Handle<T> NewHandle(T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetReference(pos_, object);
-    return Handle<T>(GetHandle(pos_++));
+    Handle<T> h(GetHandle(pos_));
+    pos_++;
+    return h;
   }
 
   template<class T>
   HandleWrapper<T> NewHandleWrapper(T** object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetReference(pos_, *object);
-    Handle<T> h(GetHandle(pos_++));
+    Handle<T> h(GetHandle(pos_));
+    pos_++;
     return HandleWrapper<T>(object, h);
   }
 
  private:
-  // references_storage_ needs to be first so that it matches the address of references_.
+  // References_storage_ needs to be first so that it appears in the same location as
+  // HandleScope::references_.
   StackReference<mirror::Object> references_storage_[kNumReferences];
+
+  // The thread that the stack handle scope is a linked list upon. The stack handle scope will
+  // push and pop itself from this thread.
   Thread* const self_;
+
+  // Position new handles will be created.
   size_t pos_;
 
   template<size_t kNumRefs> friend class StackHandleScope;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 33339f8..7e3b6ba 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -52,7 +52,6 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "os.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index 449817a..a33a981 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -18,6 +18,7 @@
 
 #include "common_runtime_test.h"
 #include "mirror/object-inl.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 6e10a4c..dce1c15 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -181,7 +181,7 @@
 // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
 // test-art-host-run-test-interpreter-018-stack-overflow
 // test-art-host-run-test-interpreter-107-int-math2
-static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB;
+static constexpr size_t kX86StackOverflowReservedBytes = (kIsDebugBuild ? 32 : 24) * KB;
 static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
 
 static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index f459b59..f4eaa61 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -18,6 +18,7 @@
 
 #include <sys/uio.h>
 
+#include "arch/context.h"
 #include "atomic.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -34,7 +35,6 @@
 #if !defined(ART_USE_PORTABLE_COMPILER)
 #include "entrypoints/quick/quick_entrypoints.h"
 #endif
-#include "object_utils.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -93,16 +93,17 @@
     method->ClearIsPortableCompiled();
   }
   if (!method->IsResolutionMethod()) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     if (quick_code == GetQuickToInterpreterBridge() ||
-        quick_code == GetQuickToInterpreterBridgeTrampoline(Runtime::Current()->GetClassLinker()) ||
-        (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker()) &&
+        quick_code == class_linker->GetQuickToInterpreterBridgeTrampoline() ||
+        (quick_code == class_linker->GetQuickResolutionTrampoline() &&
          Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly()
          && !method->IsNative() && !method->IsProxyMethod())) {
       if (kIsDebugBuild) {
         if (quick_code == GetQuickToInterpreterBridge()) {
           DCHECK(portable_code == GetPortableToInterpreterBridge());
-        } else if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
-          DCHECK(portable_code == GetPortableResolutionTrampoline(Runtime::Current()->GetClassLinker()));
+        } else if (quick_code == class_linker->GetQuickResolutionTrampoline()) {
+          DCHECK(portable_code == class_linker->GetPortableResolutionTrampoline());
         }
       }
       DCHECK(!method->IsNative()) << PrettyMethod(method);
@@ -133,8 +134,8 @@
       new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
       new_quick_code = class_linker->GetQuickOatCodeFor(method);
     } else {
-      new_portable_code = GetPortableResolutionTrampoline(class_linker);
-      new_quick_code = GetQuickResolutionTrampoline(class_linker);
+      new_portable_code = class_linker->GetPortableResolutionTrampoline();
+      new_quick_code = class_linker->GetQuickResolutionTrampoline();
     }
   } else {  // !uninstall
     if ((interpreter_stubs_installed_ || forced_interpret_only_ || IsDeoptimized(method)) &&
@@ -146,20 +147,17 @@
       // class, all its static methods code will be set to the instrumentation entry point.
       // For more details, see ClassLinker::FixupStaticTrampolines.
       if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
-        // Do not overwrite interpreter to prevent from posting method entry/exit events twice.
-        new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
-        new_quick_code = class_linker->GetQuickOatCodeFor(method);
-        DCHECK(new_quick_code != GetQuickToInterpreterBridgeTrampoline(class_linker));
-        if (entry_exit_stubs_installed_ && new_quick_code != GetQuickToInterpreterBridge()) {
-          // TODO: portable to quick bridge. Bug: 8196384. We cannot enable the check below as long
-          // as GetPortableToQuickBridge() == GetPortableToInterpreterBridge().
-          // DCHECK(new_portable_code != GetPortableToInterpreterBridge());
+        if (entry_exit_stubs_installed_) {
           new_portable_code = GetPortableToInterpreterBridge();
           new_quick_code = GetQuickInstrumentationEntryPoint();
+        } else {
+          new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+          new_quick_code = class_linker->GetQuickOatCodeFor(method);
+          DCHECK(new_quick_code != class_linker->GetQuickToInterpreterBridgeTrampoline());
         }
       } else {
-        new_portable_code = GetPortableResolutionTrampoline(class_linker);
-        new_quick_code = GetQuickResolutionTrampoline(class_linker);
+        new_portable_code = class_linker->GetPortableResolutionTrampoline();
+        new_quick_code = class_linker->GetQuickResolutionTrampoline();
       }
     }
   }
@@ -175,7 +173,6 @@
   struct InstallStackVisitor : public StackVisitor {
     InstallStackVisitor(Thread* thread, Context* context, uintptr_t instrumentation_exit_pc)
         : StackVisitor(thread, context),  instrumentation_stack_(thread->GetInstrumentationStack()),
-          existing_instrumentation_frames_count_(instrumentation_stack_->size()),
           instrumentation_exit_pc_(instrumentation_exit_pc),
           reached_existing_instrumentation_frames_(false), instrumentation_stack_depth_(0),
           last_return_pc_(0) {
@@ -190,18 +187,10 @@
         last_return_pc_ = 0;
         return true;  // Ignore upcalls.
       }
-      if (m->IsRuntimeMethod()) {
-        if (kVerboseInstrumentation) {
-          LOG(INFO) << "  Skipping runtime method. Frame " << GetFrameId();
-        }
-        last_return_pc_ = GetReturnPc();
-        return true;  // Ignore unresolved methods since they will be instrumented after resolution.
-      }
-      if (kVerboseInstrumentation) {
-        LOG(INFO) << "  Installing exit stub in " << DescribeLocation();
-      }
       if (GetCurrentQuickFrame() == NULL) {
-        InstrumentationStackFrame instrumentation_frame(GetThisObject(), m, 0, GetFrameId(), false);
+        bool interpreter_frame = !m->IsPortableCompiled();
+        InstrumentationStackFrame instrumentation_frame(GetThisObject(), m, 0, GetFrameId(),
+                                                        interpreter_frame);
         if (kVerboseInstrumentation) {
           LOG(INFO) << "Pushing shadow frame " << instrumentation_frame.Dump();
         }
@@ -209,6 +198,32 @@
         return true;  // Continue.
       }
       uintptr_t return_pc = GetReturnPc();
+      if (m->IsRuntimeMethod()) {
+        if (return_pc == instrumentation_exit_pc_) {
+          if (kVerboseInstrumentation) {
+            LOG(INFO) << "  Handling quick to interpreter transition. Frame " << GetFrameId();
+          }
+          CHECK_LT(instrumentation_stack_depth_, instrumentation_stack_->size());
+          const InstrumentationStackFrame& frame = instrumentation_stack_->at(instrumentation_stack_depth_);
+          CHECK(frame.interpreter_entry_);
+          // This is an interpreter frame so method enter event must have been reported. However we
+          // need to push a DEX pc into the dex_pcs_ list to match size of instrumentation stack.
+          // Since we won't report method entry here, we can safely push any DEX pc.
+          dex_pcs_.push_back(0);
+          last_return_pc_ = frame.return_pc_;
+          ++instrumentation_stack_depth_;
+          return true;
+        } else {
+          if (kVerboseInstrumentation) {
+            LOG(INFO) << "  Skipping runtime method. Frame " << GetFrameId();
+          }
+          last_return_pc_ = GetReturnPc();
+          return true;  // Ignore unresolved methods since they will be instrumented after resolution.
+        }
+      }
+      if (kVerboseInstrumentation) {
+        LOG(INFO) << "  Installing exit stub in " << DescribeLocation();
+      }
       if (return_pc == instrumentation_exit_pc_) {
         // We've reached a frame which has already been installed with instrumentation exit stub.
         // We should have already installed instrumentation on previous frames.
@@ -231,8 +246,15 @@
           LOG(INFO) << "Pushing frame " << instrumentation_frame.Dump();
         }
 
-        // Insert frame before old ones so we do not corrupt the instrumentation stack.
-        auto it = instrumentation_stack_->end() - existing_instrumentation_frames_count_;
+        // Insert frame at the right position so we do not corrupt the instrumentation stack.
+        // Instrumentation stack frames are in descending frame id order.
+        auto it = instrumentation_stack_->begin();
+        for (auto end = instrumentation_stack_->end(); it != end; ++it) {
+          const InstrumentationStackFrame& current = *it;
+          if (instrumentation_frame.frame_id_ >= current.frame_id_) {
+            break;
+          }
+        }
         instrumentation_stack_->insert(it, instrumentation_frame);
         SetReturnPc(instrumentation_exit_pc_);
       }
@@ -243,7 +265,6 @@
     }
     std::deque<InstrumentationStackFrame>* const instrumentation_stack_;
     std::vector<InstrumentationStackFrame> shadow_stack_;
-    const size_t existing_instrumentation_frames_count_;
     std::vector<uint32_t> dex_pcs_;
     const uintptr_t instrumentation_exit_pc_;
     bool reached_existing_instrumentation_frames_;
@@ -275,7 +296,9 @@
       }
       uint32_t dex_pc = visitor.dex_pcs_.back();
       visitor.dex_pcs_.pop_back();
-      instrumentation->MethodEnterEvent(thread, (*isi).this_object_, (*isi).method_, dex_pc);
+      if (!isi->interpreter_entry_) {
+        instrumentation->MethodEnterEvent(thread, (*isi).this_object_, (*isi).method_, dex_pc);
+      }
     }
   }
   thread->VerifyStack();
@@ -570,22 +593,25 @@
       new_portable_code = GetPortableToInterpreterBridge();
       new_quick_code = GetQuickToInterpreterBridge();
       new_have_portable_code = false;
-    } else if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker()) ||
-        quick_code == GetQuickToInterpreterBridgeTrampoline(Runtime::Current()->GetClassLinker()) ||
-        quick_code == GetQuickToInterpreterBridge()) {
-      DCHECK((portable_code == GetPortableResolutionTrampoline(Runtime::Current()->GetClassLinker())) ||
-             (portable_code == GetPortableToInterpreterBridge()));
-      new_portable_code = portable_code;
-      new_quick_code = quick_code;
-      new_have_portable_code = have_portable_code;
-    } else if (entry_exit_stubs_installed_) {
-      new_quick_code = GetQuickInstrumentationEntryPoint();
-      new_portable_code = GetPortableToInterpreterBridge();
-      new_have_portable_code = false;
     } else {
-      new_portable_code = portable_code;
-      new_quick_code = quick_code;
-      new_have_portable_code = have_portable_code;
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      if (quick_code == class_linker->GetQuickResolutionTrampoline() ||
+          quick_code == class_linker->GetQuickToInterpreterBridgeTrampoline() ||
+          quick_code == GetQuickToInterpreterBridge()) {
+        DCHECK((portable_code == class_linker->GetPortableResolutionTrampoline()) ||
+               (portable_code == GetPortableToInterpreterBridge()));
+        new_portable_code = portable_code;
+        new_quick_code = quick_code;
+        new_have_portable_code = have_portable_code;
+      } else if (entry_exit_stubs_installed_) {
+        new_quick_code = GetQuickInstrumentationEntryPoint();
+        new_portable_code = GetPortableToInterpreterBridge();
+        new_have_portable_code = false;
+      } else {
+        new_portable_code = portable_code;
+        new_quick_code = quick_code;
+        new_have_portable_code = have_portable_code;
+      }
     }
   }
   UpdateEntrypoints(method, new_quick_code, new_portable_code, new_have_portable_code);
@@ -606,7 +632,7 @@
   CHECK(!already_deoptimized) << "Method " << PrettyMethod(method) << " is already deoptimized";
 
   if (!interpreter_stubs_installed_) {
-    UpdateEntrypoints(method, GetQuickToInterpreterBridge(), GetPortableToInterpreterBridge(),
+    UpdateEntrypoints(method, GetQuickInstrumentationEntryPoint(), GetPortableToInterpreterBridge(),
                       false);
 
     // Install instrumentation exit stub and instrumentation frames. We may already have installed
@@ -639,8 +665,9 @@
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     if (method->IsStatic() && !method->IsConstructor() &&
         !method->GetDeclaringClass()->IsInitialized()) {
-      UpdateEntrypoints(method, GetQuickResolutionTrampoline(class_linker),
-                        GetPortableResolutionTrampoline(class_linker), false);
+      // TODO: we're updating to entrypoints in the image here, we can avoid the trampoline.
+      UpdateEntrypoints(method, class_linker->GetQuickResolutionTrampoline(),
+                        class_linker->GetPortableResolutionTrampoline(), false);
     } else {
       bool have_portable_code = false;
       const void* quick_code = class_linker->GetQuickOatCodeFor(method);
@@ -720,8 +747,9 @@
   if (LIKELY(!instrumentation_stubs_installed_)) {
     const void* code = method->GetEntryPointFromQuickCompiledCode();
     DCHECK(code != nullptr);
-    if (LIKELY(code != GetQuickResolutionTrampoline(runtime->GetClassLinker())) &&
-        LIKELY(code != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker())) &&
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    if (LIKELY(code != class_linker->GetQuickResolutionTrampoline()) &&
+        LIKELY(code != class_linker->GetQuickToInterpreterBridgeTrampoline()) &&
         LIKELY(code != GetQuickToInterpreterBridge())) {
       return code;
     }
@@ -844,7 +872,9 @@
                                                                    frame_id, interpreter_entry);
   stack->push_front(instrumentation_frame);
 
-  MethodEnterEvent(self, this_object, method, 0);
+  if (!interpreter_entry) {
+    MethodEnterEvent(self, this_object, method, 0);
+  }
 }
 
 TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
@@ -875,7 +905,9 @@
   //       return_pc.
   uint32_t dex_pc = DexFile::kDexNoIndex;
   mirror::Object* this_object = instrumentation_frame.this_object_;
-  MethodExitEvent(self, this_object, instrumentation_frame.method_, dex_pc, return_value);
+  if (!instrumentation_frame.interpreter_entry_) {
+    MethodExitEvent(self, this_object, instrumentation_frame.method_, dex_pc, return_value);
+  }
 
   // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get
   // back to an upcall.
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 5995d9e..d462e14 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -19,6 +19,8 @@
 #include "common_runtime_test.h"
 #include "mirror/object.h"
 #include "handle_scope-inl.h"
+#include "mirror/string.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 729444e..e3068b3 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -95,11 +95,11 @@
     jint newValue = args[4];
     bool success;
     if (Runtime::Current()->IsActiveTransaction()) {
-      success = obj->CasFieldWeakSequentiallyConsistent32<true>(MemberOffset(offset),
-                                                                expectedValue, newValue);
+      success = obj->CasFieldStrongSequentiallyConsistent32<true>(MemberOffset(offset),
+                                                                  expectedValue, newValue);
     } else {
-      success = obj->CasFieldWeakSequentiallyConsistent32<false>(MemberOffset(offset),
-                                                                 expectedValue, newValue);
+      success = obj->CasFieldStrongSequentiallyConsistent32<false>(MemberOffset(offset),
+                                                                   expectedValue, newValue);
     }
     result->SetZ(success ? JNI_TRUE : JNI_FALSE);
   } else if (name == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
@@ -356,6 +356,7 @@
          shadow_frame.GetMethod()->GetDeclaringClass()->IsProxyClass());
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
+  shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   bool transaction_active = Runtime::Current()->IsActiveTransaction();
   if (LIKELY(shadow_frame.GetMethod()->IsPreverified())) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 9f04b90..b35da0c 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -15,6 +15,8 @@
  */
 
 #include "interpreter_common.h"
+
+#include "field_helper.h"
 #include "mirror/array-inl.h"
 
 namespace art {
@@ -45,6 +47,7 @@
       return false;
     }
   }
+  f->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
   // Report this field access to instrumentation if needed.
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
   if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
@@ -221,6 +224,7 @@
       return false;
     }
   }
+  f->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
   uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
   // Report this field access to instrumentation if needed. Since we only have the offset of
   // the field from the base of the object, we need to look for it first.
@@ -757,40 +761,64 @@
   }
 }
 
+// Helper function to deal with class loading in an unstarted runtime.
+static void UnstartedRuntimeFindClass(Thread* self, Handle<mirror::String> className,
+                                      Handle<mirror::ClassLoader> class_loader, JValue* result,
+                                      const std::string& method_name, bool initialize_class,
+                                      bool abort_if_not_found)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  CHECK(className.Get() != nullptr);
+  std::string descriptor(DotToDescriptor(className->ToModifiedUtf8().c_str()));
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+
+  Class* found = class_linker->FindClass(self, descriptor.c_str(), class_loader);
+  if (found == nullptr && abort_if_not_found) {
+    if (!self->IsExceptionPending()) {
+      AbortTransaction(self, "%s failed in un-started runtime for class: %s",
+                       method_name.c_str(), PrettyDescriptor(descriptor).c_str());
+    }
+    return;
+  }
+  if (found != nullptr && initialize_class) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_class(hs.NewHandle(found));
+    if (!class_linker->EnsureInitialized(h_class, true, true)) {
+      CHECK(self->IsExceptionPending());
+      return;
+    }
+  }
+  result->SetL(found);
+}
+
 static void UnstartedRuntimeInvoke(Thread* self, MethodHelper& mh,
                                    const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
                                    JValue* result, size_t arg_offset) {
   // In a runtime that's not started we intercept certain methods to avoid complicated dependency
   // problems in core libraries.
   std::string name(PrettyMethod(shadow_frame->GetMethod()));
-  if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)"
-      || name == "java.lang.Class java.lang.VMClassLoader.loadClass(java.lang.String, boolean)") {
-    // TODO Class#forName should actually call Class::EnsureInitialized always. Support for the
-    // other variants that take more arguments should also be added.
-    std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
-
-    // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
-    Class* found = Runtime::Current()->GetClassLinker()->FindClass(
-        self, descriptor.c_str(), NullHandle<mirror::ClassLoader>());
-    if (found == NULL) {
-      if (!self->IsExceptionPending()) {
-        AbortTransaction(self, "Class.forName failed in un-started runtime for class: %s",
-                         PrettyDescriptor(descriptor).c_str());
-      }
-      return;
-    }
-    result->SetL(found);
+  if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)") {
+    // TODO: Support for the other variants that take more arguments should also be added.
+    mirror::String* class_name = shadow_frame->GetVRegReference(arg_offset)->AsString();
+    StackHandleScope<1> hs(self);
+    Handle<mirror::String> h_class_name(hs.NewHandle(class_name));
+    UnstartedRuntimeFindClass(self, h_class_name, NullHandle<mirror::ClassLoader>(), result, name,
+                              true, true);
+  } else if (name == "java.lang.Class java.lang.VMClassLoader.loadClass(java.lang.String, boolean)") {
+    mirror::String* class_name = shadow_frame->GetVRegReference(arg_offset)->AsString();
+    StackHandleScope<1> hs(self);
+    Handle<mirror::String> h_class_name(hs.NewHandle(class_name));
+    UnstartedRuntimeFindClass(self, h_class_name, NullHandle<mirror::ClassLoader>(), result, name,
+                              false, true);
+  } else if (name == "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") {
+    mirror::String* class_name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
+    mirror::ClassLoader* class_loader =
+        down_cast<mirror::ClassLoader*>(shadow_frame->GetVRegReference(arg_offset));
+    StackHandleScope<2> hs(self);
+    Handle<mirror::String> h_class_name(hs.NewHandle(class_name));
+    Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
+    UnstartedRuntimeFindClass(self, h_class_name, h_class_loader, result, name, false, false);
   } else if (name == "java.lang.Class java.lang.Void.lookupType()") {
     result->SetL(Runtime::Current()->GetClassLinker()->FindPrimitiveClass('V'));
-  } else if (name == "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") {
-    StackHandleScope<1> hs(self);
-    Handle<ClassLoader> class_loader(
-        hs.NewHandle(down_cast<mirror::ClassLoader*>(shadow_frame->GetVRegReference(arg_offset))));
-    std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset + 1)->AsString()->ToModifiedUtf8().c_str()));
-
-    Class* found = Runtime::Current()->GetClassLinker()->FindClass(self, descriptor.c_str(),
-                                                                   class_loader);
-    result->SetL(found);
   } else if (name == "java.lang.Object java.lang.Class.newInstance()") {
     Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
     ArtMethod* c = klass->FindDeclaredDirectMethod("<init>", "()V");
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 5277330..1bcd27e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -27,9 +27,10 @@
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex_instruction.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "handle_scope-inl.h"
+#include "method_helper-inl.h"
 #include "nth_caller_visitor.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method.h"
@@ -39,7 +40,6 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
-#include "object_utils.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index cb4868c..abd4b44 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -536,6 +536,7 @@
     if (UNLIKELY(obj == NULL)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
+      obj->GetClass()->AssertInitializedOrInitializingInThread(self);
       // Don't allow finalizable objects to be allocated during a transaction since these can't be
       // finalized without a started runtime.
       if (transaction_active && obj->GetClass()->IsFinalizable()) {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bdf2a20..c635648 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -449,6 +449,7 @@
         if (UNLIKELY(obj == NULL)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
+          obj->GetClass()->AssertInitializedOrInitializingInThread(self);
           // Don't allow finalizable objects to be allocated during a transaction since these can't
           // be finalized without a started runtime.
           if (transaction_active && obj->GetClass()->IsFinalizable()) {
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 29d3c8a..ad18d8a 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -16,6 +16,7 @@
 
 #include "object_registry.h"
 
+#include "mirror/class.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index e1a6875..f0314a3 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -17,20 +17,21 @@
 #ifndef ART_RUNTIME_JDWP_OBJECT_REGISTRY_H_
 #define ART_RUNTIME_JDWP_OBJECT_REGISTRY_H_
 
+#include <jni.h>
 #include <stdint.h>
 
 #include <map>
 
 #include "jdwp/jdwp.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/class.h"
-#include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
-#include "object_callbacks.h"
 #include "safe_map.h"
 
 namespace art {
 
+namespace mirror {
+  class Object;
+  class Class;
+}  // namespace mirror
+
 struct ObjectRegistryEntry {
   // Is jni_reference a weak global or a regular global reference?
   jobjectRefType jni_reference_type;
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 845691d..f9c7ec6 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -41,7 +41,6 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "mirror/throwable.h"
-#include "object_utils.h"
 #include "parsed_options.h"
 #include "reflection.h"
 #include "runtime.h"
@@ -3002,7 +3001,7 @@
     LOG(ERROR) << "Bad JNI version passed to CreateJavaVM: " << args->version;
     return JNI_EVERSION;
   }
-  Runtime::Options options;
+  RuntimeOptions options;
   for (int i = 0; i < args->nOptions; ++i) {
     JavaVMOption* option = &args->options[i];
     options.push_back(std::make_pair(std::string(option->optionString), option->extraInfo));
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 8ef1cb6..7c7e60c 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -19,6 +19,7 @@
 #include "common_compiler_test.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/string-inl.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 
 namespace art {
diff --git a/runtime/method_helper-inl.h b/runtime/method_helper-inl.h
new file mode 100644
index 0000000..4f95a28
--- /dev/null
+++ b/runtime/method_helper-inl.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_METHOD_HELPER_INL_H_
+#define ART_RUNTIME_METHOD_HELPER_INL_H_
+
+#include "method_helper.h"
+
+#include "class_linker.h"
+#include "mirror/object_array.h"
+#include "runtime.h"
+#include "thread-inl.h"
+
+namespace art {
+
+inline mirror::Class* MethodHelper::GetClassFromTypeIdx(uint16_t type_idx, bool resolve) {
+  mirror::ArtMethod* method = GetMethod();
+  mirror::Class* type = method->GetDexCacheResolvedTypes()->Get(type_idx);
+  if (type == nullptr && resolve) {
+    type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
+    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
+  }
+  return type;
+}
+
+inline mirror::Class* MethodHelper::GetReturnType(bool resolve) {
+  mirror::ArtMethod* method = GetMethod();
+  const DexFile* dex_file = method->GetDexFile();
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(method->GetDexMethodIndex());
+  const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
+  uint16_t return_type_idx = proto_id.return_type_idx_;
+  return GetClassFromTypeIdx(return_type_idx, resolve);
+}
+
+inline mirror::String* MethodHelper::ResolveString(uint32_t string_idx) {
+  mirror::ArtMethod* method = GetMethod();
+  mirror::String* s = method->GetDexCacheStrings()->Get(string_idx);
+  if (UNLIKELY(s == nullptr)) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+    s = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), string_idx,
+                                                            dex_cache);
+  }
+  return s;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_METHOD_HELPER_INL_H_
diff --git a/runtime/method_helper.cc b/runtime/method_helper.cc
new file mode 100644
index 0000000..1bd2f90
--- /dev/null
+++ b/runtime/method_helper.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "method_helper-inl.h"
+
+#include "class_linker.h"
+#include "dex_file-inl.h"
+#include "handle_scope-inl.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/dex_cache.h"
+#include "runtime.h"
+
+namespace art {
+
+mirror::String* MethodHelper::GetNameAsString(Thread* self) {
+  const DexFile* dex_file = method_->GetDexFile();
+  mirror::ArtMethod* method = method_->GetInterfaceMethodIfProxy();
+  uint32_t dex_method_idx = method->GetDexMethodIndex();
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+  return Runtime::Current()->GetClassLinker()->ResolveString(*dex_file, method_id.name_idx_,
+                                                             dex_cache);
+}
+
+bool MethodHelper::HasSameNameAndSignature(MethodHelper* other) {
+  const DexFile* dex_file = method_->GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetMethod()->GetDexMethodIndex());
+  if (method_->GetDexCache() == other->method_->GetDexCache()) {
+    const DexFile::MethodId& other_mid =
+        dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
+    return mid.name_idx_ == other_mid.name_idx_ && mid.proto_idx_ == other_mid.proto_idx_;
+  }
+  const DexFile* other_dex_file = other->method_->GetDexFile();
+  const DexFile::MethodId& other_mid =
+      other_dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
+  if (!DexFileStringEquals(dex_file, mid.name_idx_, other_dex_file, other_mid.name_idx_)) {
+    return false;  // Name mismatch.
+  }
+  return dex_file->GetMethodSignature(mid) == other_dex_file->GetMethodSignature(other_mid);
+}
+
+bool MethodHelper::HasSameSignatureWithDifferentClassLoaders(MethodHelper* other) {
+  if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
+    return false;
+  }
+  const DexFile::TypeList* types = method_->GetParameterTypeList();
+  const DexFile::TypeList* other_types = other->method_->GetParameterTypeList();
+  if (types == nullptr) {
+    return (other_types == nullptr) || (other_types->Size() == 0);
+  } else if (UNLIKELY(other_types == nullptr)) {
+    return types->Size() == 0;
+  }
+  uint32_t num_types = types->Size();
+  if (UNLIKELY(num_types != other_types->Size())) {
+    return false;
+  }
+  for (uint32_t i = 0; i < num_types; ++i) {
+    mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+    mirror::Class* other_param_type =
+        other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
+    if (UNLIKELY(param_type != other_param_type)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+uint32_t MethodHelper::FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::ArtMethod* method = GetMethod();
+  const DexFile* dexfile = method->GetDexFile();
+  if (dexfile == &other_dexfile) {
+    return method->GetDexMethodIndex();
+  }
+  const DexFile::MethodId& mid = dexfile->GetMethodId(method->GetDexMethodIndex());
+  const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
+  const DexFile::StringId* other_descriptor =
+      other_dexfile.FindStringId(mid_declaring_class_descriptor);
+  if (other_descriptor != nullptr) {
+    const DexFile::TypeId* other_type_id =
+        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
+    if (other_type_id != nullptr) {
+      const char* mid_name = dexfile->GetMethodName(mid);
+      const DexFile::StringId* other_name = other_dexfile.FindStringId(mid_name);
+      if (other_name != nullptr) {
+        uint16_t other_return_type_idx;
+        std::vector<uint16_t> other_param_type_idxs;
+        bool success = other_dexfile.CreateTypeList(
+            dexfile->GetMethodSignature(mid).ToString(), &other_return_type_idx,
+            &other_param_type_idxs);
+        if (success) {
+          const DexFile::ProtoId* other_sig =
+              other_dexfile.FindProtoId(other_return_type_idx, other_param_type_idxs);
+          if (other_sig != nullptr) {
+            const  DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
+                *other_type_id, *other_name, *other_sig);
+            if (other_mid != nullptr) {
+              return other_dexfile.GetIndexForMethodId(*other_mid);
+            }
+          }
+        }
+      }
+    }
+  }
+  return DexFile::kDexNoIndex;
+}
+
+uint32_t MethodHelper::FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
+                                                        uint32_t name_and_signature_idx)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::ArtMethod* method = GetMethod();
+  const DexFile* dexfile = method->GetDexFile();
+  const uint32_t dex_method_idx = method->GetDexMethodIndex();
+  const DexFile::MethodId& mid = dexfile->GetMethodId(dex_method_idx);
+  const DexFile::MethodId& name_and_sig_mid = other_dexfile.GetMethodId(name_and_signature_idx);
+  DCHECK_STREQ(dexfile->GetMethodName(mid), other_dexfile.GetMethodName(name_and_sig_mid));
+  DCHECK_EQ(dexfile->GetMethodSignature(mid), other_dexfile.GetMethodSignature(name_and_sig_mid));
+  if (dexfile == &other_dexfile) {
+    return dex_method_idx;
+  }
+  const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
+  const DexFile::StringId* other_descriptor =
+      other_dexfile.FindStringId(mid_declaring_class_descriptor);
+  if (other_descriptor != nullptr) {
+    const DexFile::TypeId* other_type_id =
+        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
+    if (other_type_id != nullptr) {
+      const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
+          *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
+          other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
+      if (other_mid != nullptr) {
+        return other_dexfile.GetIndexForMethodId(*other_mid);
+      }
+    }
+  }
+  return DexFile::kDexNoIndex;
+}
+
+}  // namespace art
diff --git a/runtime/method_helper.h b/runtime/method_helper.h
new file mode 100644
index 0000000..62465be
--- /dev/null
+++ b/runtime/method_helper.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_METHOD_HELPER_H_
+#define ART_RUNTIME_METHOD_HELPER_H_
+
+#include "base/macros.h"
+#include "handle.h"
+#include "mirror/art_method.h"
+#include "primitive.h"
+
+namespace art {
+
+class MethodHelper {
+ public:
+  explicit MethodHelper(Handle<mirror::ArtMethod> m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : method_(m), shorty_(nullptr), shorty_len_(0) {
+    SetMethod(m.Get());
+  }
+
+  void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(new_m != nullptr);
+    SetMethod(new_m);
+    shorty_ = nullptr;
+  }
+
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return method_->GetInterfaceMethodIfProxy();
+  }
+
+  mirror::String* GetNameAsString(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const char* result = shorty_;
+    if (result == nullptr) {
+      result = method_->GetShorty(&shorty_len_);
+      shorty_ = result;
+    }
+    return result;
+  }
+
+  uint32_t GetShortyLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (shorty_ == nullptr) {
+      GetShorty();
+    }
+    return shorty_len_;
+  }
+
+  // Counts the number of references in the parameter list of the corresponding method.
+  // Note: Thus does _not_ include "this" for non-static methods.
+  uint32_t GetNumberOfReferenceArgsWithoutReceiver() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const char* shorty = GetShorty();
+    uint32_t refs = 0;
+    for (uint32_t i = 1; i < shorty_len_ ; ++i) {
+      if (shorty[i] == 'L') {
+        refs++;
+      }
+    }
+
+    return refs;
+  }
+
+  // May cause thread suspension due to GetClassFromTypeIdx calling ResolveType this caused a large
+  // number of bugs at call sites.
+  mirror::Class* GetReturnType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  size_t NumArgs() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // "1 +" because the first in Args is the receiver.
+    // "- 1" because we don't count the return type.
+    return (method_->IsStatic() ? 0 : 1) + GetShortyLength() - 1;
+  }
+
+  // Get the primitive type associated with the given parameter.
+  Primitive::Type GetParamPrimitiveType(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CHECK_LT(param, NumArgs());
+    if (GetMethod()->IsStatic()) {
+      param++;  // 0th argument must skip return value at start of the shorty
+    } else if (param == 0) {
+      return Primitive::kPrimNot;
+    }
+    return Primitive::GetType(GetShorty()[param]);
+  }
+
+  // Is the specified parameter a long or double, where parameter 0 is 'this' for instance methods.
+  bool IsParamALongOrDouble(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Primitive::Type type = GetParamPrimitiveType(param);
+    return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+  }
+
+  // Is the specified parameter a reference, where parameter 0 is 'this' for instance methods.
+  bool IsParamAReference(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetParamPrimitiveType(param) == Primitive::kPrimNot;
+  }
+
+  bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::Class* GetClassFromTypeIdx(uint16_t type_idx, bool resolve = true)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // The name_and_signature_idx MUST point to a MethodId with the same name and signature in the
+  // other_dexfile, such as the method index used to resolve this method in the other_dexfile.
+  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
+                                            uint32_t name_and_signature_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  // Set the method_ field, for proxy methods looking up the interface method via the resolved
+  // methods table.
+  void SetMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    method_.Assign(method);
+  }
+
+  Handle<mirror::ArtMethod> method_;
+  const char* shorty_;
+  uint32_t shorty_len_;
+
+  DISALLOW_COPY_AND_ASSIGN(MethodHelper);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_METHOD_HELPER_H_
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 43bdf49..f3c8250 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,6 +27,11 @@
 namespace art {
 namespace mirror {
 
+inline uint32_t Array::ClassSize() {
+  uint32_t vtable_entries = Object::kVTableLength;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0);
+}
+
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index f7b5737..63f9860 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -25,7 +25,6 @@
 #include "object-inl.h"
 #include "object_array.h"
 #include "object_array-inl.h"
-#include "object_utils.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
 #include "utils.h"
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 25a4535..6588b57 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -30,6 +30,9 @@
 
 class MANAGED Array : public Object {
  public:
+  // The size of a java.lang.Class representing an array.
+  static uint32_t ClassSize();
+
   // Allocates an array with the given properties, if fill_usable is true the array will be of at
   // least component_count size, however, if there's usable space at the end of the allocation the
   // array will fill it.
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index 686fded..00bed92 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -20,15 +20,20 @@
 #include "art_field.h"
 
 #include "base/logging.h"
+#include "dex_cache.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jvalue.h"
 #include "object-inl.h"
-#include "object_utils.h"
 #include "primitive.h"
 
 namespace art {
 namespace mirror {
 
+inline uint32_t ArtField::ClassSize() {
+  uint32_t vtable_entries = Object::kVTableLength + 6;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0);
+}
+
 inline Class* ArtField::GetDeclaringClass() {
   Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_));
   DCHECK(result != NULL);
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index f2729f6..da21dfe 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -20,7 +20,6 @@
 #include "gc/accounting/card_table-inl.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
-#include "object_utils.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 502cec7..741c6eb 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -19,22 +19,33 @@
 
 #include <jni.h>
 
-#include "class.h"
 #include "modifiers.h"
 #include "object.h"
 #include "object_callbacks.h"
+#include "primitive.h"
 #include "read_barrier.h"
 
 namespace art {
 
 struct ArtFieldOffsets;
+class DexFile;
 class ScopedObjectAccessAlreadyRunnable;
 
 namespace mirror {
 
+class DexCache;
+
 // C++ mirror of java.lang.reflect.ArtField
-class MANAGED ArtField : public Object {
+class MANAGED ArtField FINAL : public Object {
  public:
+  // Size of java.lang.reflect.ArtField.class.
+  static uint32_t ClassSize();
+
+  // Size of an instance of java.lang.reflect.ArtField not including its value array.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(ArtField);
+  }
+
   static ArtField* FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
                                       jobject jlr_field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -143,11 +154,17 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   Primitive::Type GetTypeAsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
@@ -169,11 +186,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(ArtField);
 };
 
-class MANAGED ArtFieldClass : public Class {
- private:
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ArtFieldClass);
-};
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 8fcacc2..01b05a6 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -19,16 +19,33 @@
 
 #include "art_method.h"
 
+#include "class_linker.h"
+#include "dex_cache.h"
 #include "dex_file.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "method_helper.h"
+#include "object-inl.h"
 #include "object_array.h"
 #include "oat.h"
 #include "quick/quick_method_frame_info.h"
+#include "read_barrier-inl.h"
 #include "runtime-inl.h"
 
 namespace art {
 namespace mirror {
 
+inline uint32_t ArtMethod::ClassSize() {
+  uint32_t vtable_entries = Object::kVTableLength + 8;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0);
+}
+
+template<ReadBarrierOption kReadBarrierOption>
+inline Class* ArtMethod::GetJavaLangReflectArtMethod() {
+  DCHECK(java_lang_reflect_ArtMethod_ != nullptr);
+  return ReadBarrier::BarrierForRoot<mirror::Class, kReadBarrierOption>(
+      &java_lang_reflect_ArtMethod_);
+}
+
 inline Class* ArtMethod::GetDeclaringClass() {
   Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_));
   DCHECK(result != NULL) << this;
@@ -122,8 +139,8 @@
     return;
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  if (code == GetQuickResolutionTrampoline(class_linker) ||
-      code == GetQuickToInterpreterBridgeTrampoline(class_linker)) {
+  if (code == class_linker->GetQuickResolutionTrampoline() ||
+      code == class_linker->GetQuickToInterpreterBridgeTrampoline()) {
     return;
   }
   DCHECK(IsWithinQuickCode(pc))
@@ -162,7 +179,7 @@
   // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
   // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
   // for non-native methods.
-  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
+  DCHECK(entry_point != runtime->GetClassLinker()->GetQuickToInterpreterBridgeTrampoline());
   if (UNLIKELY(entry_point == GetQuickToInterpreterBridge()) ||
       UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
     return nullptr;
@@ -289,7 +306,7 @@
   // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
   // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
   // for non-native methods. And we really shouldn't see a failure for non-native methods here.
-  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
+  DCHECK(entry_point != runtime->GetClassLinker()->GetQuickToInterpreterBridgeTrampoline());
   CHECK(entry_point != GetQuickToInterpreterBridge());
 
   if (UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 4821e29..167f848 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -16,6 +16,7 @@
 
 #include "art_method.h"
 
+#include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/stringpiece.h"
@@ -26,12 +27,12 @@
 #include "interpreter/interpreter.h"
 #include "jni_internal.h"
 #include "mapping_table.h"
-#include "object-inl.h"
-#include "object_array.h"
+#include "method_helper.h"
 #include "object_array-inl.h"
+#include "object_array.h"
+#include "object-inl.h"
 #include "scoped_thread_state_change.h"
 #include "string.h"
-#include "object_utils.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -355,14 +356,6 @@
   self->PopManagedStackFragment(fragment);
 }
 
-bool ArtMethod::IsRegistered() {
-  void* native_method =
-      GetFieldPtr<void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_));
-  CHECK(native_method != nullptr);
-  void* jni_stub = GetJniDlsymLookupStub();
-  return native_method != jni_stub;
-}
-
 void ArtMethod::RegisterNative(Thread* self, const void* native_method, bool is_fast) {
   DCHECK(Thread::Current() == self);
   CHECK(IsNative()) << PrettyMethod(this);
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index a55c48b..081bee1 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -17,21 +17,19 @@
 #ifndef ART_RUNTIME_MIRROR_ART_METHOD_H_
 #define ART_RUNTIME_MIRROR_ART_METHOD_H_
 
-#include "class.h"
 #include "dex_file.h"
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
 #include "object_callbacks.h"
 #include "quick/quick_method_frame_info.h"
-#include "read_barrier.h"
+#include "read_barrier_option.h"
 
 namespace art {
 
 struct ArtMethodOffsets;
 struct ConstructorMethodOffsets;
 union JValue;
-struct MethodClassOffsets;
 class MethodHelper;
 class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
@@ -39,14 +37,20 @@
 
 namespace mirror {
 
-class StaticStorageBase;
-
 typedef void (EntryPointFromInterpreter)(Thread* self, MethodHelper& mh,
     const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame, JValue* result);
 
-// C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
-class MANAGED ArtMethod : public Object {
+// C++ mirror of java.lang.reflect.ArtMethod.
+class MANAGED ArtMethod FINAL : public Object {
  public:
+  // Size of java.lang.reflect.ArtMethod.class.
+  static uint32_t ClassSize();
+
+  // Size of an instance of java.lang.reflect.ArtMethod not including its value array.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(ArtMethod);
+  }
+
   static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                         jobject jlr_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -357,8 +361,6 @@
     return kPointerSize;
   }
 
-  bool IsRegistered() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void RegisterNative(Thread* self, const void* native_method, bool is_fast)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -411,11 +413,7 @@
   static void SetClass(Class* java_lang_reflect_ArtMethod);
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  static Class* GetJavaLangReflectArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(java_lang_reflect_ArtMethod_ != nullptr);
-    return ReadBarrier::BarrierForRoot<mirror::Class, kReadBarrierOption>(
-        &java_lang_reflect_ArtMethod_);
-  }
+  static Class* GetJavaLangReflectArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void ResetClass();
 
@@ -423,27 +421,45 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetDeclaringClassDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t unused_length;
     return GetShorty(&unused_length);
   }
+
   const char* GetShorty(uint32_t* out_length) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const Signature GetSignature() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile::CodeItem* GetCodeItem() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool IsResolvedTypeIdx(uint16_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   int32_t GetLineNumFromDexPC(uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile::ProtoId& GetPrototype() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile::TypeList* GetParameterTypeList() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetDeclaringClassSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   uint16_t GetClassDefIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile::ClassDef& GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetReturnTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetTypeDescriptorFromTypeIdx(uint16_t type_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::ClassLoader* GetClassLoader() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* GetInterfaceMethodIfProxy() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
@@ -505,11 +521,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(ArtMethod);
 };
 
-class MANAGED ArtMethodClass : public Class {
- private:
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ArtMethodClass);
-};
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 451235c..329a984 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -19,8 +19,8 @@
 
 #include "class.h"
 
-#include "art_field.h"
-#include "art_method.h"
+#include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "class_loader.h"
 #include "common_throws.h"
@@ -29,6 +29,8 @@
 #include "gc/heap-inl.h"
 #include "iftable.h"
 #include "object_array-inl.h"
+#include "read_barrier-inl.h"
+#include "reference-inl.h"
 #include "runtime.h"
 #include "string.h"
 
@@ -148,6 +150,23 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), new_imtable);
 }
 
+inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i) {
+  uint32_t offset = EmbeddedImTableOffset().Uint32Value() + i * sizeof(ImTableEntry);
+  return GetFieldObject<mirror::ArtMethod>(MemberOffset(offset));
+}
+
+inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method) {
+  uint32_t offset = EmbeddedImTableOffset().Uint32Value() + i * sizeof(ImTableEntry);
+  SetFieldObject<false>(MemberOffset(offset), method);
+  CHECK(method == GetImTable()->Get(i));
+}
+
+inline void Class::SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method) {
+  uint32_t offset = EmbeddedVTableOffset().Uint32Value() + i * sizeof(VTableEntry);
+  SetFieldObject<false>(MemberOffset(offset), method);
+  CHECK(method == GetVTableDuringLinking()->Get(i));
+}
+
 inline bool Class::Implements(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(klass->IsInterface()) << PrettyClass(this);
@@ -373,7 +392,8 @@
 
 inline void Class::SetSFields(ObjectArray<ArtField>* new_sfields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_)));
+  DCHECK((IsRetired() && new_sfields == nullptr) ||
+         (NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_))));
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields);
 }
 
@@ -435,9 +455,9 @@
 
 template<VerifyObjectFlags kVerifyFlags>
 inline uint32_t Class::GetAccessFlags() {
-  // Check class is loaded or this is java.lang.String that has a
+  // Check class is loaded/retired or this is java.lang.String that has a
   // circularity issue during loading the names of its members
-  DCHECK(IsLoaded<kVerifyFlags>() ||
+  DCHECK(IsIdxLoaded<kVerifyFlags>() || IsRetired<kVerifyFlags>() ||
          IsErroneous<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>() ||
          this == String::GetJavaLangString() ||
          this == ArtField::GetJavaLangReflectArtField() ||
@@ -503,12 +523,63 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
+inline uint32_t Class::ComputeClassSize(bool has_embedded_tables,
+                                        uint32_t num_vtable_entries,
+                                        uint32_t num_32bit_static_fields,
+                                        uint32_t num_64bit_static_fields,
+                                        uint32_t num_ref_static_fields) {
+  // Space used by java.lang.Class and its instance fields.
+  uint32_t size = sizeof(Class);
+  // Space used by embedded tables.
+  if (has_embedded_tables) {
+    uint32_t embedded_imt_size = kImtSize * sizeof(ImTableEntry);
+    uint32_t embedded_vtable_size = num_vtable_entries * sizeof(VTableEntry);
+    size += embedded_imt_size + embedded_vtable_size;
+  }
+  // Space used by reference statics.
+  size +=  num_ref_static_fields * sizeof(HeapReference<Object>);
+  // Possible pad for alignment.
+  if (((size & 7) != 0) && (num_64bit_static_fields > 0) && (num_32bit_static_fields == 0)) {
+    size += sizeof(uint32_t);
+  }
+  // Space used for primitive static fields.
+  size += (num_32bit_static_fields * sizeof(uint32_t)) +
+      (num_64bit_static_fields * sizeof(uint64_t));
+  return size;
+}
+
 template <bool kVisitClass, typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   // Visit the static fields first so that we don't overwrite the SFields / IFields instance
   // fields.
-  VisitStaticFieldsReferences<kVisitClass>(this, visitor);
   VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+  if (!IsTemp()) {
+    // Temp classes don't ever populate imt/vtable or static fields and they are not even
+    // allocated with the right size for those.
+    VisitStaticFieldsReferences<kVisitClass>(this, visitor);
+    if (ShouldHaveEmbeddedImtAndVTable()) {
+      VisitEmbeddedImtAndVTable(visitor);
+    }
+  }
+}
+
+template<typename Visitor>
+inline void Class::VisitEmbeddedImtAndVTable(const Visitor& visitor) {
+  uint32_t pos = sizeof(mirror::Class);
+
+  size_t count = kImtSize;
+  for (size_t i = 0; i < count; ++i) {
+    MemberOffset offset = MemberOffset(pos);
+    visitor(this, offset, true);
+    pos += sizeof(ImTableEntry);
+  }
+
+  count = ((GetVTable() != NULL) ? GetVTable()->GetLength() : 0);
+  for (size_t i = 0; i < count; ++i) {
+    MemberOffset offset = MemberOffset(pos);
+    visitor(this, offset, true);
+    pos += sizeof(VTableEntry);
+  }
 }
 
 template<ReadBarrierOption kReadBarrierOption>
@@ -521,6 +592,11 @@
   return this == ArtMethod::GetJavaLangReflectArtMethod<kReadBarrierOption>();
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+inline bool Class::IsReferenceClass() const {
+  return this == Reference::GetJavaLangRefReference<kReadBarrierOption>();
+}
+
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Class::IsClassClass() {
   Class* java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
@@ -546,6 +622,68 @@
   }
 }
 
+inline void Class::AssertInitializedOrInitializingInThread(Thread* self) {
+  if (kIsDebugBuild && !IsInitialized()) {
+    CHECK(IsInitializing()) << PrettyClass(this) << " is not initializing: " << GetStatus();
+    CHECK_EQ(GetClinitThreadId(), self->GetTid()) << PrettyClass(this)
+                                                  << " is initializing in a different thread";
+  }
+}
+
+inline ObjectArray<Class>* Class::GetInterfaces() {
+  CHECK(IsProxyClass());
+  // First static field.
+  DCHECK(GetSFields()->Get(0)->IsArtField());
+  DCHECK_STREQ(GetSFields()->Get(0)->GetName(), "interfaces");
+  MemberOffset field_offset = GetSFields()->Get(0)->GetOffset();
+  return GetFieldObject<ObjectArray<Class>>(field_offset);
+}
+
+inline ObjectArray<ObjectArray<Class>>* Class::GetThrows() {
+  CHECK(IsProxyClass());
+  // Second static field.
+  DCHECK(GetSFields()->Get(1)->IsArtField());
+  DCHECK_STREQ(GetSFields()->Get(1)->GetName(), "throws");
+  MemberOffset field_offset = GetSFields()->Get(1)->GetOffset();
+  return GetFieldObject<ObjectArray<ObjectArray<Class>>>(field_offset);
+}
+
+inline MemberOffset Class::GetDisableIntrinsicFlagOffset() {
+  CHECK(IsReferenceClass());
+  // First static field
+  DCHECK(GetSFields()->Get(0)->IsArtField());
+  DCHECK_STREQ(GetSFields()->Get(0)->GetName(), "disableIntrinsic");
+  return GetSFields()->Get(0)->GetOffset();
+}
+
+inline MemberOffset Class::GetSlowPathFlagOffset() {
+  CHECK(IsReferenceClass());
+  // Second static field
+  DCHECK(GetSFields()->Get(1)->IsArtField());
+  DCHECK_STREQ(GetSFields()->Get(1)->GetName(), "slowPathEnabled");
+  return GetSFields()->Get(1)->GetOffset();
+}
+
+inline bool Class::GetSlowPathEnabled() {
+  return GetField32(GetSlowPathFlagOffset());
+}
+
+inline void Class::SetSlowPath(bool enabled) {
+  SetField32<false>(GetSlowPathFlagOffset(), enabled);
+}
+
+inline void Class::InitializeClassVisitor::operator()(
+    mirror::Object* obj, size_t usable_size) const {
+  DCHECK_LE(class_size_, usable_size);
+  // Avoid AsClass as object is not yet in live bitmap or allocation stack.
+  mirror::Class* klass = down_cast<mirror::Class*>(obj);
+  // DCHECK(klass->IsClass());
+  klass->SetClassSize(class_size_);
+  klass->SetPrimitiveType(Primitive::kPrimNot);  // Default to not being primitive.
+  klass->SetDexClassDefIndex(DexFile::kDexNoIndex16);  // Default to no valid class def index.
+  klass->SetDexTypeIndex(DexFile::kDexNoIndex16);  // Default to no valid type index.
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index c6472c6..fadf80e 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -18,17 +18,16 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
-#include "class-inl.h"
 #include "class_linker.h"
 #include "class_loader.h"
+#include "class-inl.h"
 #include "dex_cache.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
-#include "object-inl.h"
-#include "object_array-inl.h"
-#include "object_utils.h"
-#include "runtime.h"
 #include "handle_scope-inl.h"
+#include "object_array-inl.h"
+#include "object-inl.h"
+#include "runtime.h"
 #include "thread.h"
 #include "throwable.h"
 #include "utils.h"
@@ -63,7 +62,8 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool class_linker_initialized = class_linker != nullptr && class_linker->IsInitialized();
   if (LIKELY(class_linker_initialized)) {
-    if (UNLIKELY(new_status <= old_status && new_status != kStatusError)) {
+    if (UNLIKELY(new_status <= old_status && new_status != kStatusError &&
+                 new_status != kStatusRetired)) {
       LOG(FATAL) << "Unexpected change back of class status for " << PrettyClass(this) << " "
           << old_status << " -> " << new_status;
     }
@@ -87,18 +87,22 @@
     Handle<mirror::ArtMethod> old_throw_method(hs.NewHandle(old_throw_location.GetMethod()));
     uint32_t old_throw_dex_pc = old_throw_location.GetDexPc();
     bool is_exception_reported = self->IsExceptionReportedToInstrumentation();
-    // clear exception to call FindSystemClass
-    self->ClearException();
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    Class* eiie_class = class_linker->FindSystemClass(self,
-                                                      "Ljava/lang/ExceptionInInitializerError;");
-    CHECK(!self->IsExceptionPending());
-
-    // Only verification errors, not initialization problems, should set a verify error.
-    // This is to ensure that ThrowEarlierClassFailure will throw NoClassDefFoundError in that case.
-    Class* exception_class = old_exception->GetClass();
-    if (!eiie_class->IsAssignableFrom(exception_class)) {
-      SetVerifyErrorClass(exception_class);
+    Class* eiie_class;
+    // Do't attempt to use FindClass if we have an OOM error since this can try to do more
+    // allocations and may cause infinite loops.
+    if (old_exception.Get() == nullptr ||
+        old_exception->GetClass()->GetDescriptor() != "Ljava/lang/OutOfMemoryError;") {
+      // Clear exception to call FindSystemClass.
+      self->ClearException();
+      eiie_class = Runtime::Current()->GetClassLinker()->FindSystemClass(
+          self, "Ljava/lang/ExceptionInInitializerError;");
+      CHECK(!self->IsExceptionPending());
+      // Only verification errors, not initialization problems, should set a verify error.
+      // This is to ensure that ThrowEarlierClassFailure will throw NoClassDefFoundError in that case.
+      Class* exception_class = old_exception->GetClass();
+      if (!eiie_class->IsAssignableFrom(exception_class)) {
+        SetVerifyErrorClass(exception_class);
+      }
     }
 
     // Restore exception.
@@ -113,11 +117,27 @@
   } else {
     SetField32Volatile<false>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
   }
-  // Classes that are being resolved or initialized need to notify waiters that the class status
-  // changed. See ClassLinker::EnsureResolved and ClassLinker::WaitForInitializeClass.
-  if ((old_status >= kStatusResolved || new_status >= kStatusResolved) &&
-      class_linker_initialized) {
-    NotifyAll(self);
+
+  if (!class_linker_initialized) {
+    // When the class linker is being initialized its single threaded and by definition there can be
+    // no waiters. During initialization classes may appear temporary but won't be retired as their
+    // size was statically computed.
+  } else {
+    // Classes that are being resolved or initialized need to notify waiters that the class status
+    // changed. See ClassLinker::EnsureResolved and ClassLinker::WaitForInitializeClass.
+    if (IsTemp()) {
+      // Class is a temporary one, ensure that waiters for resolution get notified of retirement
+      // so that they can grab the new version of the class from the class linker's table.
+      CHECK_LT(new_status, kStatusResolved) << PrettyDescriptor(this);
+      if (new_status == kStatusRetired || new_status == kStatusError) {
+        NotifyAll(self);
+      }
+    } else {
+      CHECK_NE(new_status, kStatusRetired);
+      if (old_status >= kStatusResolved || new_status >= kStatusResolved) {
+        NotifyAll(self);
+      }
+    }
   }
 }
 
@@ -217,35 +237,39 @@
       os << StringPrintf("    %2zd: %s (cl=%p)\n", i, PrettyClass(interface).c_str(), cl);
     }
   }
-  // After this point, this may have moved due to GetDirectInterface.
-  os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
-     << (h_super.Get() != nullptr ? h_super->NumVirtualMethods() : 0) << " in super):\n";
-  for (size_t i = 0; i < NumVirtualMethods(); ++i) {
-    os << StringPrintf("    %2zd: %s\n", i,
-                       PrettyMethod(h_this->GetVirtualMethodDuringLinking(i)).c_str());
-  }
-  os << "  direct methods (" << h_this->NumDirectMethods() << " entries):\n";
-  for (size_t i = 0; i < h_this->NumDirectMethods(); ++i) {
-    os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(h_this->GetDirectMethod(i)).c_str());
-  }
-  if (h_this->NumStaticFields() > 0) {
-    os << "  static fields (" << h_this->NumStaticFields() << " entries):\n";
-    if (h_this->IsResolved() || h_this->IsErroneous()) {
-      for (size_t i = 0; i < h_this->NumStaticFields(); ++i) {
-        os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetStaticField(i)).c_str());
-      }
-    } else {
-      os << "    <not yet available>";
+  if (!IsLoaded()) {
+    os << "  class not yet loaded";
+  } else {
+    // After this point, this may have moved due to GetDirectInterface.
+    os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
+        << (h_super.Get() != nullptr ? h_super->NumVirtualMethods() : 0) << " in super):\n";
+    for (size_t i = 0; i < NumVirtualMethods(); ++i) {
+      os << StringPrintf("    %2zd: %s\n", i,
+                         PrettyMethod(h_this->GetVirtualMethodDuringLinking(i)).c_str());
     }
-  }
-  if (h_this->NumInstanceFields() > 0) {
-    os << "  instance fields (" << h_this->NumInstanceFields() << " entries):\n";
-    if (h_this->IsResolved() || h_this->IsErroneous()) {
-      for (size_t i = 0; i < h_this->NumInstanceFields(); ++i) {
-        os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetInstanceField(i)).c_str());
+    os << "  direct methods (" << h_this->NumDirectMethods() << " entries):\n";
+    for (size_t i = 0; i < h_this->NumDirectMethods(); ++i) {
+      os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(h_this->GetDirectMethod(i)).c_str());
+    }
+    if (h_this->NumStaticFields() > 0) {
+      os << "  static fields (" << h_this->NumStaticFields() << " entries):\n";
+      if (h_this->IsResolved() || h_this->IsErroneous()) {
+        for (size_t i = 0; i < h_this->NumStaticFields(); ++i) {
+          os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetStaticField(i)).c_str());
+        }
+      } else {
+        os << "    <not yet available>";
       }
-    } else {
-      os << "    <not yet available>";
+    }
+    if (h_this->NumInstanceFields() > 0) {
+      os << "  instance fields (" << h_this->NumInstanceFields() << " entries):\n";
+      if (h_this->IsResolved() || h_this->IsErroneous()) {
+        for (size_t i = 0; i < h_this->NumInstanceFields(); ++i) {
+          os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetInstanceField(i)).c_str());
+        }
+      } else {
+        os << "    <not yet available>";
+      }
     }
   }
 }
@@ -721,9 +745,7 @@
   } else if (IsArrayClass()) {
     return 2;
   } else if (IsProxyClass()) {
-    mirror::SynthesizedProxyClass* proxy_class=
-        reinterpret_cast<mirror::SynthesizedProxyClass*>(this);
-    mirror::ObjectArray<mirror::Class>* interfaces = proxy_class->GetInterfaces();
+    mirror::ObjectArray<mirror::Class>* interfaces = GetInterfaces();
     return interfaces != nullptr ? interfaces->GetLength() : 0;
   } else {
     const DexFile::TypeList* interfaces = GetInterfaceTypeList();
@@ -753,9 +775,7 @@
       return class_linker->FindSystemClass(self, "Ljava/io/Serializable;");
     }
   } else if (klass->IsProxyClass()) {
-    mirror::SynthesizedProxyClass* proxy_class =
-        reinterpret_cast<mirror::SynthesizedProxyClass*>(klass.Get());
-    mirror::ObjectArray<mirror::Class>* interfaces = proxy_class->GetInterfaces();
+    mirror::ObjectArray<mirror::Class>* interfaces = klass.Get()->GetInterfaces();
     DCHECK(interfaces != nullptr);
     return interfaces->Get(idx);
   } else {
@@ -798,5 +818,49 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
+void Class::PopulateEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ObjectArray<ArtMethod>* table = GetImTable();
+  if (table != nullptr) {
+    for (uint32_t i = 0; i < kImtSize; i++) {
+      SetEmbeddedImTableEntry(i, table->Get(i));
+    }
+  }
+
+  table = GetVTableDuringLinking();
+  CHECK(table != nullptr);
+  for (int32_t i = 0; i < table->GetLength(); i++) {
+    SetEmbeddedVTableEntry(i, table->Get(i));
+  }
+}
+
+Class* Class::CopyOf(Thread* self, int32_t new_length) {
+  DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
+  // We may get copied by a compacting GC.
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> h_this(hs.NewHandle(this));
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  InitializeClassVisitor visitor(new_length);
+
+  mirror::Object* new_class =
+      kMovingClasses ? heap->AllocObject<true>(self, java_lang_Class_, new_length, visitor)
+                     : heap->AllocNonMovableObject<true>(self, java_lang_Class_, new_length, visitor);
+  if (UNLIKELY(new_class == nullptr)) {
+    CHECK(self->IsExceptionPending());  // Expect an OOME.
+    return NULL;
+  }
+
+  mirror::Class* new_class_obj = new_class->AsClass();
+  memcpy(new_class_obj, h_this.Get(), sizeof(Class));
+
+  new_class_obj->SetStatus(kStatusResolving, self);
+  new_class_obj->PopulateEmbeddedImtAndVTable();
+  // Correct some fields.
+  new_class_obj->SetLockWord(LockWord(), false);
+  new_class_obj->SetClassSize(new_length);
+
+  Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(new_class_obj);
+  return new_class_obj;
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index e735c45..648bdde 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -22,6 +22,7 @@
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
+#include "object_array.h"
 #include "object_callbacks.h"
 #include "primitive.h"
 #include "read_barrier.h"
@@ -62,7 +63,6 @@
 
 namespace art {
 
-struct ClassClassOffsets;
 struct ClassOffsets;
 class Signature;
 class StringPiece;
@@ -70,13 +70,29 @@
 namespace mirror {
 
 class ArtField;
+class ArtMethod;
 class ClassLoader;
 class DexCache;
 class IfTable;
 
 // C++ mirror of java.lang.Class
-class MANAGED Class : public Object {
+class MANAGED Class FINAL : public Object {
  public:
+  // Interface method table size. Increasing this value reduces the chance of two interface methods
+  // colliding in the interface method table but increases the size of classes that implement
+  // (non-marker) interfaces.
+  static constexpr size_t kImtSize = 64;
+
+  // imtable entry embedded in class object.
+  struct MANAGED ImTableEntry {
+    HeapReference<ArtMethod> method;
+  };
+
+  // vtable entry embedded in class object.
+  struct MANAGED VTableEntry {
+    HeapReference<ArtMethod> method;
+  };
+
   // Class Status
   //
   // kStatusNotReady: If a Class cannot be found in the class table by
@@ -95,6 +111,11 @@
   // using ResolveClass to initialize the super_class_ and ensuring the
   // interfaces are resolved.
   //
+  // kStatusResolving: Class is just cloned with the right size from
+  // temporary class that's acting as a placeholder for linking. The old
+  // class will be retired. New class is set to this status first before
+  // moving on to being resolved.
+  //
   // kStatusResolved: Still holding the lock on Class, the ClassLinker
   // shows linking is complete and fields of the Class populated by making
   // it kStatusResolved. Java allows circularities of the form where a super
@@ -109,18 +130,20 @@
   //
   // TODO: Explain the other states
   enum Status {
+    kStatusRetired = -2,
     kStatusError = -1,
     kStatusNotReady = 0,
     kStatusIdx = 1,  // Loaded, DEX idx in super_class_type_idx_ and interfaces_type_idx_.
     kStatusLoaded = 2,  // DEX idx values resolved.
-    kStatusResolved = 3,  // Part of linking.
-    kStatusVerifying = 4,  // In the process of being verified.
-    kStatusRetryVerificationAtRuntime = 5,  // Compile time verification failed, retry at runtime.
-    kStatusVerifyingAtRuntime = 6,  // Retrying verification at runtime.
-    kStatusVerified = 7,  // Logically part of linking; done pre-init.
-    kStatusInitializing = 8,  // Class init in progress.
-    kStatusInitialized = 9,  // Ready to go.
-    kStatusMax = 10,
+    kStatusResolving = 3,  // Just cloned from temporary class object.
+    kStatusResolved = 4,  // Part of linking.
+    kStatusVerifying = 5,  // In the process of being verified.
+    kStatusRetryVerificationAtRuntime = 6,  // Compile time verification failed, retry at runtime.
+    kStatusVerifyingAtRuntime = 7,  // Retrying verification at runtime.
+    kStatusVerified = 8,  // Logically part of linking; done pre-init.
+    kStatusInitializing = 9,  // Class init in progress.
+    kStatusInitialized = 10,  // Ready to go.
+    kStatusMax = 11,
   };
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -136,6 +159,12 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, status_);
   }
 
+  // Returns true if the class has been retired.
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool IsRetired() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetStatus<kVerifyFlags>() == kStatusRetired;
+  }
+
   // Returns true if the class has failed to link.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsErroneous() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -232,7 +261,7 @@
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool IsReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool IsTypeOfReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags<kVerifyFlags>() & kAccClassIsReference) != 0;
   }
 
@@ -274,6 +303,13 @@
     }
   }
 
+  // Returns true if this class is the placeholder and should retire and
+  // be replaced with a class with the right size for embedded imt/vtable.
+  bool IsTemp() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Status s = GetStatus();
+    return s < Status::kStatusResolving && ShouldHaveEmbeddedImtAndVTable();
+  }
+
   String* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Returns the cached name.
   void SetName(String* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Sets the cached name.
   // Computes the name, then sets the cached value.
@@ -383,6 +419,9 @@
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtMethodClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool IsReferenceClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static MemberOffset ComponentTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
   }
@@ -451,6 +490,25 @@
   void SetClassSize(uint32_t new_class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Compute how many bytes would be used a class with the given elements.
+  static uint32_t ComputeClassSize(bool has_embedded_tables,
+                                   uint32_t num_vtable_entries,
+                                   uint32_t num_32bit_static_fields,
+                                   uint32_t num_64bit_static_fields,
+                                   uint32_t num_ref_static_fields);
+
+  // The size of java.lang.Class.class.
+  static uint32_t ClassClassSize() {
+    // The number of vtable entries in java.lang.Class.
+    uint32_t vtable_entries = Object::kVTableLength + 64;
+    return ComputeClassSize(true, vtable_entries, 0, 1, 0);
+  }
+
+  // The size of a java.lang.Class representing a primitive such as int.class.
+  static uint32_t PrimitiveClassSize() {
+    return ComputeClassSize(false, 0, 0, 0, 0);
+  }
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -623,8 +681,6 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, vtable_);
   }
 
-  ObjectArray<ArtMethod>* GetImTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void SetImTable(ObjectArray<ArtMethod>* new_imtable)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -632,6 +688,26 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, imtable_);
   }
 
+  static MemberOffset EmbeddedImTableOffset() {
+    return MemberOffset(sizeof(Class));
+  }
+
+  static MemberOffset EmbeddedVTableOffset() {
+    return MemberOffset(sizeof(Class) + kImtSize * sizeof(mirror::Class::ImTableEntry));
+  }
+
+  bool ShouldHaveEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsInstantiable();
+  }
+
+  ArtMethod* GetEmbeddedImTableEntry(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void PopulateEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Given a method implemented by this class but potentially from a super class, return the
   // specific implementation method for this class.
   ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method)
@@ -739,11 +815,6 @@
   void SetReferenceInstanceOffsets(uint32_t new_reference_offsets)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Beginning of static field data
-  static MemberOffset FieldsOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(Class, fields_);
-  }
-
   // Returns the number of static fields containing reference types.
   uint32_t NumReferenceStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
@@ -751,7 +822,7 @@
   }
 
   uint32_t NumReferenceStaticFieldsDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(IsLoaded() || IsErroneous());
+    DCHECK(IsLoaded() || IsErroneous() || IsRetired());
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_));
   }
 
@@ -865,21 +936,71 @@
 
   template <bool kVisitClass, typename Visitor>
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
-      NO_THREAD_SAFETY_ANALYSIS;
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Visit references within the embedded tables of the class.
+  // TODO: remove NO_THREAD_SAFETY_ANALYSIS when annotalysis handles visitors better.
+  template<typename Visitor>
+  void VisitEmbeddedImtAndVTable(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS;
 
   std::string GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool DescriptorEquals(const char* match) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   std::string GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile::ClassDef* GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   uint16_t GetDirectInterfaceTypeIdx(uint32_t idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static mirror::Class* GetDirectInterface(Thread* self, Handle<mirror::Class> klass, uint32_t idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const char* GetSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   std::string GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   const DexFile::TypeList* GetInterfaceTypeList() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Asserts we are initialized or initializing in the given thread.
+  void AssertInitializedOrInitializingInThread(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  Class* CopyOf(Thread* self, int32_t new_length)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // For proxy class only.
+  ObjectArray<Class>* GetInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // For proxy class only.
+  ObjectArray<ObjectArray<Class>>* GetThrows() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // For reference class only.
+  MemberOffset GetDisableIntrinsicFlagOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  MemberOffset GetSlowPathFlagOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool GetSlowPathEnabled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetSlowPath(bool enabled) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
+  // fence.
+  class InitializeClassVisitor {
+   public:
+    explicit InitializeClassVisitor(uint32_t class_size) : class_size_(class_size) {
+    }
+
+    void operator()(mirror::Object* obj, size_t usable_size) const
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+   private:
+    const uint32_t class_size_;
+
+    DISALLOW_COPY_AND_ASSIGN(InitializeClassVisitor);
+  };
+
  private:
   void SetVerifyErrorClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -898,6 +1019,8 @@
 
   void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ObjectArray<ArtMethod>* GetImTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // defining class loader, or NULL for the "bootstrap" system loader
   HeapReference<ClassLoader> class_loader_;
 
@@ -1008,7 +1131,12 @@
   // values are kept in a table in gDvm.
   // InitiatingLoaderList initiating_loader_list_;
 
-  // Location of first static field.
+  // The following data exist in real class objects.
+  // Embedded Imtable, for class object that's not an interface, fixed size.
+  ImTableEntry embedded_imtable_[0];
+  // Embedded Vtable, for class object that's not an interface, variable size.
+  VTableEntry embedded_vtable_[0];
+  // Static fields, variable size.
   uint32_t fields_[0];
 
   // java.lang.Class
@@ -1020,14 +1148,6 @@
 
 std::ostream& operator<<(std::ostream& os, const Class::Status& rhs);
 
-class MANAGED ClassClass : public Class {
- private:
-  int32_t pad_;
-  int64_t serialVersionUID_;
-  friend struct art::ClassClassOffsets;  // for verifying offset information
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ClassClass);
-};
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 74dae38..f3594e4 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -27,6 +27,12 @@
 
 // C++ mirror of java.lang.ClassLoader
 class MANAGED ClassLoader : public Object {
+ public:
+  // Size of an instance of java.lang.ClassLoader.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(ClassLoader);
+  }
+
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Object> packages_;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 7e40f64..08cff99 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -24,6 +24,11 @@
 namespace art {
 namespace mirror {
 
+inline uint32_t DexCache::ClassSize() {
+  uint32_t vtable_entries = Object::kVTableLength + 1;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0);
+}
+
 inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ArtMethod* method = GetResolvedMethods()->Get(method_idx);
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 65a5026..bfd603a 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -18,10 +18,8 @@
 #define ART_RUNTIME_MIRROR_DEX_CACHE_H_
 
 #include "art_method.h"
-#include "class.h"
 #include "object.h"
 #include "object_array.h"
-#include "string.h"
 
 namespace art {
 
@@ -33,15 +31,21 @@
 namespace mirror {
 
 class ArtField;
+class ArtMethod;
 class Class;
+class String;
 
-class MANAGED DexCacheClass : public Class {
- private:
-  DISALLOW_IMPLICIT_CONSTRUCTORS(DexCacheClass);
-};
-
-class MANAGED DexCache : public Object {
+// C++ mirror of java.lang.DexCache.
+class MANAGED DexCache FINAL : public Object {
  public:
+  // Size of java.lang.DexCache.class.
+  static uint32_t ClassSize();
+
+  // Size of an instance of java.lang.DexCache not including referenced values.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(DexCache);
+  }
+
   void Init(const DexFile* dex_file,
             String* location,
             ObjectArray<String>* strings,
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 3d28dc6..ef6fc67 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -24,6 +24,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index ad312ed..5feb602 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -23,7 +23,7 @@
 namespace art {
 namespace mirror {
 
-class MANAGED IfTable : public ObjectArray<Object> {
+class MANAGED IfTable FINAL : public ObjectArray<Object> {
  public:
   Class* GetInterface(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Class* interface = Get((i * kMax) + kInterface)->AsClass();
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 089ef57..9dbfb56 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -35,6 +35,11 @@
 namespace art {
 namespace mirror {
 
+inline uint32_t Object::ClassSize() {
+  uint32_t vtable_entries = kVTableLength;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0);
+}
+
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Object::GetClass() {
   return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(
@@ -75,6 +80,12 @@
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
+inline bool Object::CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelaxed32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
 inline uint32_t Object::GetLockOwnerThreadId() {
   return Monitor::GetLockOwnerThreadId(this);
 }
@@ -238,7 +249,7 @@
 
 template<VerifyObjectFlags kVerifyFlags>
 inline bool Object::IsReferenceInstance() {
-  return GetClass<kVerifyFlags>()->IsReferenceClass();
+  return GetClass<kVerifyFlags>()->IsTypeOfReferenceClass();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -443,6 +454,8 @@
   SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags, true>(field_offset, new_value);
 }
 
+// TODO: Pass memory_order_ and strong/weak as arguments to avoid code duplication?
+
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset,
                                                          int32_t old_value, int32_t new_value) {
@@ -461,6 +474,42 @@
   return atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_value, new_value);
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelaxed32(MemberOffset field_offset,
+                                          int32_t old_value, int32_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
+
+  return atomic_addr->CompareExchangeWeakRelaxed(old_value, new_value);
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongSequentiallyConsistent32(MemberOffset field_offset,
+                                                           int32_t old_value, int32_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
+
+  return atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_value, new_value);
+}
+
 template<VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
 inline int64_t Object::GetField64(MemberOffset field_offset) {
   if (kVerifyFlags & kVerifyThis) {
@@ -526,6 +575,23 @@
   return atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_value, new_value);
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongSequentiallyConsistent64(MemberOffset field_offset,
+                                                           int64_t old_value, int64_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField64(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  Atomic<int64_t>* atomic_addr = reinterpret_cast<Atomic<int64_t>*>(raw_addr);
+  return atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_value, new_value);
+}
+
 template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption,
          bool kIsVolatile>
 inline T* Object::GetFieldObject(MemberOffset field_offset) {
@@ -644,6 +710,38 @@
   return success;
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset,
+                                                             Object* old_value, Object* new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref.reference_,
+                                                                          new_ref.reference_);
+
+  if (success) {
+    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+  }
+  return success;
+}
+
 template<bool kVisitClass, bool kIsStatic, typename Visitor>
 inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) {
   if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
@@ -687,6 +785,7 @@
 
 template<bool kVisitClass, typename Visitor>
 inline void Object::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
+  DCHECK(!klass->IsTemp());
   klass->VisitFieldsReferences<kVisitClass, true>(
       klass->GetReferenceStaticOffsets<kVerifyNone>(), visitor);
 }
@@ -707,7 +806,7 @@
   } else {
     DCHECK(!klass->IsVariableSize());
     VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
-    if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
+    if (UNLIKELY(klass->IsTypeOfReferenceClass<kVerifyNone>())) {
       ref_visitor(klass, AsReference());
     }
   }
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index e58091f..961bc64 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -24,13 +24,13 @@
 #include "class.h"
 #include "class-inl.h"
 #include "class_linker-inl.h"
+#include "field_helper.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "iftable-inl.h"
 #include "monitor.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
-#include "object_utils.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
 #include "throwable.h"
@@ -57,7 +57,7 @@
       ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Copy java.lang.ref.Reference.referent which isn't visited in
     // Object::VisitReferences().
-    DCHECK(klass->IsReferenceClass());
+    DCHECK(klass->IsTypeOfReferenceClass());
     this->operator()(ref, mirror::Reference::ReferentOffset(), false);
   }
 
@@ -156,7 +156,7 @@
         // loop iteration.
         LockWord hash_word(LockWord::FromHashCode(GenerateIdentityHashCode()));
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
-        if (const_cast<Object*>(this)->CasLockWordWeakSequentiallyConsistent(lw, hash_word)) {
+        if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
           return hash_word.GetHashCode();
         }
         break;
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index d29011a..4fae470 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -63,13 +63,24 @@
 // C++ mirror of java.lang.Object
 class MANAGED LOCKABLE Object {
  public:
+  // The number of vtable entries in java.lang.Object.
+  static constexpr size_t kVTableLength = 11;
+
+  // The size of the java.lang.Class representing a java.lang.Object.
+  static uint32_t ClassSize();
+
+  // Size of an instance of java.lang.Object.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(Object);
+  }
+
   static MemberOffset ClassOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Object, klass_);
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  Class* GetClass() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE Class* GetClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetClass(Class* new_klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -99,19 +110,14 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
-  // As volatile can be false if the mutators are suspended. This is an optimization since it
+  // As_volatile can be false if the mutators are suspended. This is an optimization since it
   // avoids the barriers.
   LockWord GetLockWord(bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetLockWord(LockWord new_val, bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  // All Cas operations defined here have C++11 memory_order_seq_cst ordering
-  // semantics: Preceding memory operations become visible to other threads
-  // before the CAS, and subsequent operations become visible after the CAS.
-  // The Cas operations defined here do not fail spuriously, i.e. they
-  // have C++11 "strong" semantics.
-  // TODO: In most, possibly all, cases, these assumptions are too strong.
-  // Confirm and weaken the implementation.
   bool CasLockWordWeakSequentiallyConsistent(LockWord old_val, LockWord new_val)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
   mirror::Object* MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -202,27 +208,27 @@
   // Accessor for Java type fields.
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
       ReadBarrierOption kReadBarrierOption = kWithReadBarrier, bool kIsVolatile = false>
-  T* GetFieldObject(MemberOffset field_offset) ALWAYS_INLINE
+  ALWAYS_INLINE T* GetFieldObject(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
       ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  T* GetFieldObjectVolatile(MemberOffset field_offset) ALWAYS_INLINE
+  ALWAYS_INLINE T* GetFieldObjectVolatile(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  void SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value)
-      ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  void SetFieldObject(MemberOffset field_offset, Object* new_value) ALWAYS_INLINE
+  ALWAYS_INLINE void SetFieldObject(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetFieldObjectVolatile(MemberOffset field_offset, Object* new_value) ALWAYS_INLINE
+  ALWAYS_INLINE void SetFieldObjectVolatile(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
@@ -231,49 +237,67 @@
                                                 Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
+                                                  Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  int32_t GetField32(MemberOffset field_offset) ALWAYS_INLINE
+  ALWAYS_INLINE int32_t GetField32(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  int32_t GetField32Volatile(MemberOffset field_offset) ALWAYS_INLINE
+  ALWAYS_INLINE int32_t GetField32Volatile(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  void SetField32(MemberOffset field_offset, int32_t new_value) ALWAYS_INLINE
+  ALWAYS_INLINE void SetField32(MemberOffset field_offset, int32_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetField32Volatile(MemberOffset field_offset, int32_t new_value) ALWAYS_INLINE
+  ALWAYS_INLINE void SetField32Volatile(MemberOffset field_offset, int32_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset, int32_t old_value,
-                                            int32_t new_value) ALWAYS_INLINE
+  ALWAYS_INLINE bool CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset,
+                                                          int32_t old_value, int32_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakRelaxed32(MemberOffset field_offset, int32_t old_value,
+                             int32_t new_value) ALWAYS_INLINE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongSequentiallyConsistent32(MemberOffset field_offset, int32_t old_value,
+                                              int32_t new_value) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  int64_t GetField64(MemberOffset field_offset) ALWAYS_INLINE
+  ALWAYS_INLINE int64_t GetField64(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  int64_t GetField64Volatile(MemberOffset field_offset) ALWAYS_INLINE
+  ALWAYS_INLINE int64_t GetField64Volatile(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  void SetField64(MemberOffset field_offset, int64_t new_value) ALWAYS_INLINE
+  ALWAYS_INLINE void SetField64(MemberOffset field_offset, int64_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetField64Volatile(MemberOffset field_offset, int64_t new_value) ALWAYS_INLINE
+  ALWAYS_INLINE void SetField64Volatile(MemberOffset field_offset, int64_t new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
@@ -283,6 +307,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongSequentiallyConsistent64(MemberOffset field_offset, int64_t old_value,
+                                              int64_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
   void SetFieldPtr(MemberOffset field_offset, T new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 54d1240..7012b19 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -23,8 +23,13 @@
 namespace mirror {
 
 template<class T>
-class MANAGED ObjectArray : public Array {
+class MANAGED ObjectArray: public Array {
  public:
+  // The size of Object[].class.
+  static uint32_t ClassSize() {
+    return Array::ClassSize();
+  }
+
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length,
                                gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index f85fb27..a7ea6c9 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -28,7 +28,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "iftable-inl.h"
@@ -36,6 +36,7 @@
 #include "object-inl.h"
 #include "object_array-inl.h"
 #include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
 #include "string-inl.h"
 
 namespace art {
diff --git a/runtime/mirror/proxy.h b/runtime/mirror/proxy.h
index 6e4947e..db511d6 100644
--- a/runtime/mirror/proxy.h
+++ b/runtime/mirror/proxy.h
@@ -25,28 +25,8 @@
 
 namespace mirror {
 
-// All proxy objects have a class which is a synthesized proxy class. The synthesized proxy class
-// has the static fields used to implement reflection on proxy objects.
-class MANAGED SynthesizedProxyClass : public Class {
- public:
-  ObjectArray<Class>* GetInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<Class>>(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
-                                                                       interfaces_));
-  }
-
-  ObjectArray<ObjectArray<Class>>* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<ObjectArray<Class>>>(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
-                                                                                     throws_));
-  }
-
- private:
-  HeapReference<ObjectArray<Class>> interfaces_;
-  HeapReference<ObjectArray<ObjectArray<Class>>> throws_;
-  DISALLOW_IMPLICIT_CONSTRUCTORS(SynthesizedProxyClass);
-};
-
 // C++ mirror of java.lang.reflect.Proxy.
-class MANAGED Proxy : public Object {
+class MANAGED Proxy FINAL : public Object {
  private:
   HeapReference<Object> h_;
 
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index 43767c8..b353402 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -22,6 +22,11 @@
 namespace art {
 namespace mirror {
 
+inline uint32_t Reference::ClassSize() {
+  uint32_t vtable_entries = Object::kVTableLength + 5;
+  return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0);
+}
+
 inline bool Reference::IsEnqueuable() {
   // Not using volatile reads as an optimization since this is only called with all the mutators
   // suspended.
diff --git a/runtime/mirror/reference.cc b/runtime/mirror/reference.cc
new file mode 100644
index 0000000..077cd4b
--- /dev/null
+++ b/runtime/mirror/reference.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reference.h"
+
+namespace art {
+namespace mirror {
+
+Class* Reference::java_lang_ref_Reference_ = nullptr;
+
+void Reference::SetClass(Class* java_lang_ref_Reference) {
+  CHECK(java_lang_ref_Reference_ == nullptr);
+  CHECK(java_lang_ref_Reference != nullptr);
+  java_lang_ref_Reference_ = java_lang_ref_Reference;
+}
+
+void Reference::ResetClass() {
+  CHECK(java_lang_ref_Reference_ != nullptr);
+  java_lang_ref_Reference_ = nullptr;
+}
+
+void Reference::VisitRoots(RootCallback* callback, void* arg) {
+  if (java_lang_ref_Reference_ != nullptr) {
+    callback(reinterpret_cast<mirror::Object**>(&java_lang_ref_Reference_),
+             arg, 0, kRootStickyClass);
+  }
+}
+
+}  // namespace mirror
+}  // namespace art
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 9c9d87b..07d47d3 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -17,7 +17,11 @@
 #ifndef ART_RUNTIME_MIRROR_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_REFERENCE_H_
 
+#include "class.h"
 #include "object.h"
+#include "object_callbacks.h"
+#include "read_barrier.h"
+#include "thread.h"
 
 namespace art {
 
@@ -36,6 +40,14 @@
 // C++ mirror of java.lang.ref.Reference
 class MANAGED Reference : public Object {
  public:
+  // Size of java.lang.ref.Reference.class.
+  static uint32_t ClassSize();
+
+  // Size of an instance of java.lang.ref.Reference.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(Reference);
+  }
+
   static MemberOffset PendingNextOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Reference, pending_next_);
   }
@@ -80,6 +92,16 @@
 
   bool IsEnqueuable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  static Class* GetJavaLangRefReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(java_lang_ref_Reference_ != nullptr);
+    return ReadBarrier::BarrierForRoot<mirror::Class, kReadBarrierOption>(
+        &java_lang_ref_Reference_);
+  }
+  static void SetClass(Class* klass);
+  static void ResetClass(void);
+  static void VisitRoots(RootCallback* callback, void* arg);
+
  private:
   // Note: This avoids a read barrier, it should only be used by the GC.
   HeapReference<Object>* GetReferentReferenceAddr() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -92,6 +114,8 @@
   HeapReference<Reference> queue_next_;  // Note this is Java volatile:
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
+  static Class* java_lang_ref_Reference_;
+
   friend struct art::ReferenceOffsets;  // for verifying offset information
   friend class gc::ReferenceProcessor;
   friend class gc::ReferenceQueue;
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index abecbc5..52b0927 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -29,7 +29,7 @@
 namespace mirror {
 
 // C++ mirror of java.lang.StackTraceElement
-class MANAGED StackTraceElement : public Object {
+class MANAGED StackTraceElement FINAL : public Object {
  public:
   String* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_));
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 315f7b1..6736497 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_STRING_INL_H_
 
 #include "array.h"
+#include "class.h"
 #include "intern_table.h"
 #include "runtime.h"
 #include "string.h"
@@ -26,6 +27,11 @@
 namespace art {
 namespace mirror {
 
+inline uint32_t String::ClassSize() {
+  uint32_t vtable_entries = Object::kVTableLength + 51;
+  return Class::ComputeClassSize(true, vtable_entries, 1, 1, 2);
+}
+
 inline CharArray* String::GetCharArray() {
   return GetFieldObject<CharArray>(ValueOffset());
 }
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index b8acede..46bdd59 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -19,22 +19,29 @@
 
 #include <gtest/gtest.h>
 
-#include "class.h"
+#include "object.h"
 #include "object_callbacks.h"
 #include "read_barrier.h"
 
 namespace art {
 
 template<class T> class Handle;
-struct StringClassOffsets;
 struct StringOffsets;
 class StringPiece;
 
 namespace mirror {
 
 // C++ mirror of java.lang.String
-class MANAGED String : public Object {
+class MANAGED String FINAL : public Object {
  public:
+  // Size of java.lang.String.class.
+  static uint32_t ClassSize();
+
+  // Size of an instance of java.lang.String not including its value array.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(String);
+  }
+
   static MemberOffset CountOffset() {
     return OFFSET_OF_OBJECT_MEMBER(String, count_);
   }
@@ -160,16 +167,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
 };
 
-class MANAGED StringClass : public Class {
- private:
-  HeapReference<CharArray> ASCII_;
-  HeapReference<Object> CASE_INSENSITIVE_ORDER_;
-  uint32_t REPLACEMENT_CHAR_;
-  int64_t serialVersionUID_;
-  friend struct art::StringClassOffsets;  // for verifying offset information
-  DISALLOW_IMPLICIT_CONSTRUCTORS(StringClass);
-};
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index 6efc9e2..1c3f1ed 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -23,7 +23,6 @@
 #include "object-inl.h"
 #include "object_array.h"
 #include "object_array-inl.h"
-#include "object_utils.h"
 #include "stack_trace_element.h"
 #include "utils.h"
 #include "well_known_classes.h"
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 5633a77..4b26eda 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -28,7 +28,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -746,7 +745,11 @@
           contention_count++;
           Runtime* runtime = Runtime::Current();
           if (contention_count <= runtime->GetMaxSpinsBeforeThinkLockInflation()) {
-            NanoSleep(1000);  // Sleep for 1us and re-attempt.
+            // TODO: Consider switch thread state to kBlocked when we are yielding.
+            // Use sched_yield instead of NanoSleep since NanoSleep can wait much longer than the
+            // parameter you pass in. This can cause thread suspension to take excessively long
+            // make long pauses. See b/16307460.
+            sched_yield();
           } else {
             contention_count = 0;
             InflateThinLocked(self, h_obj, lock_word, 0);
diff --git a/runtime/monitor_pool_test.cc b/runtime/monitor_pool_test.cc
index cddc245..e1837f5 100644
--- a/runtime/monitor_pool_test.cc
+++ b/runtime/monitor_pool_test.cc
@@ -17,6 +17,8 @@
 #include "monitor_pool.h"
 
 #include "common_runtime_test.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index bdba494..af24368 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -24,6 +24,7 @@
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"  // Strings are easiest to allocate
+#include "scoped_thread_state_change.h"
 #include "thread_pool.h"
 #include "utils.h"
 
@@ -31,7 +32,7 @@
 
 class MonitorTest : public CommonRuntimeTest {
  protected:
-  void SetUpRuntimeOptions(Runtime::Options *options) OVERRIDE {
+  void SetUpRuntimeOptions(RuntimeOptions *options) OVERRIDE {
     // Use a smaller heap
     for (std::pair<std::string, const void*>& pair : *options) {
       if (pair.first.find("-Xmx") == 0) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index f1a987f..b0b64aa 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -30,7 +30,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "runtime.h"
 #include "scoped_fast_native_object_access.h"
 #include "scoped_thread_state_change.h"
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index e619dda..e577c2c 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -21,8 +21,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
-#include "mirror/proxy.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "scoped_fast_native_object_access.h"
 #include "ScopedLocalRef.h"
@@ -91,8 +89,7 @@
 
 static jobjectArray Class_getProxyInterfaces(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::SynthesizedProxyClass* c =
-      down_cast<mirror::SynthesizedProxyClass*>(DecodeClass(soa, javaThis));
+  mirror::Class* c = DecodeClass(soa, javaThis);
   return soa.AddLocalReference<jobjectArray>(c->GetInterfaces()->Clone(soa.Self()));
 }
 
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index eae4584..f94e42b 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -20,7 +20,6 @@
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "scoped_fast_native_object_access.h"
 #include "handle_scope-inl.h"
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 1981bfd..34cb93a 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -20,7 +20,6 @@
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 3564dfdf..3903ffc 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -18,11 +18,11 @@
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
+#include "field_helper.h"
 #include "jni_internal.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
-#include "object_utils.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access.h"
 
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index 22e81e4..f029b16 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -21,8 +21,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "mirror/proxy.h"
-#include "object_utils.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access.h"
 #include "well_known_classes.h"
@@ -39,8 +37,7 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtMethod* proxy_method = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
   CHECK(proxy_method->GetDeclaringClass()->IsProxyClass());
-  mirror::SynthesizedProxyClass* proxy_class =
-      down_cast<mirror::SynthesizedProxyClass*>(proxy_method->GetDeclaringClass());
+  mirror::Class* proxy_class = proxy_method->GetDeclaringClass();
   int throws_index = -1;
   size_t num_virt_methods = proxy_class->NumVirtualMethods();
   for (size_t i = 0; i < num_virt_methods; i++) {
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 7cc4cac..65dece0 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -28,8 +28,8 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   // JNI must use non transactional mode.
-  bool success = obj->CasFieldWeakSequentiallyConsistent32<false>(MemberOffset(offset),
-                                                                  expectedValue, newValue);
+  bool success = obj->CasFieldStrongSequentiallyConsistent32<false>(MemberOffset(offset),
+                                                                    expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
@@ -38,8 +38,8 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   // JNI must use non transactional mode.
-  bool success = obj->CasFieldWeakSequentiallyConsistent64<false>(MemberOffset(offset),
-                                                                  expectedValue, newValue);
+  bool success = obj->CasFieldStrongSequentiallyConsistent64<false>(MemberOffset(offset),
+                                                                    expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
@@ -50,8 +50,8 @@
   mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   // JNI must use non transactional mode.
-  bool success = obj->CasFieldWeakSequentiallyConsistentObject<false>(MemberOffset(offset),
-                                                                      expectedValue, newValue);
+  bool success = obj->CasFieldStrongSequentiallyConsistentObject<false>(MemberOffset(offset),
+                                                                        expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
diff --git a/runtime/noop_compiler_callbacks.h b/runtime/noop_compiler_callbacks.h
index 702b2e1..65498de 100644
--- a/runtime/noop_compiler_callbacks.h
+++ b/runtime/noop_compiler_callbacks.h
@@ -25,10 +25,15 @@
  public:
   NoopCompilerCallbacks() {}
   ~NoopCompilerCallbacks() {}
+
   bool MethodVerified(verifier::MethodVerifier* verifier) OVERRIDE {
     return true;
   }
+
   void ClassRejected(ClassReference ref) OVERRIDE {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NoopCompilerCallbacks);
 };
 
 }  // namespace art
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 857c0a2..1421baf 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -17,15 +17,46 @@
 #include "oat.h"
 #include "utils.h"
 
+#include <string.h>
 #include <zlib.h>
 
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '6', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '7', '\0' };
 
-OatHeader::OatHeader() {
-  memset(this, 0, sizeof(*this));
+static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
+  size_t estimate = 0U;
+  if (variable_data != nullptr) {
+    SafeMap<std::string, std::string>::const_iterator it = variable_data->begin();
+    SafeMap<std::string, std::string>::const_iterator end = variable_data->end();
+    for ( ; it != end; ++it) {
+      estimate += it->first.length() + 1;
+      estimate += it->second.length() + 1;
+    }
+  }
+  return sizeof(OatHeader) + estimate;
+}
+
+OatHeader* OatHeader::Create(InstructionSet instruction_set,
+                             const InstructionSetFeatures& instruction_set_features,
+                             const std::vector<const DexFile*>* dex_files,
+                             uint32_t image_file_location_oat_checksum,
+                             uint32_t image_file_location_oat_data_begin,
+                             const SafeMap<std::string, std::string>* variable_data) {
+  // Estimate size of optional data.
+  size_t needed_size = ComputeOatHeaderSize(variable_data);
+
+  // Reserve enough memory.
+  void* memory = operator new (needed_size);
+
+  // Create the OatHeader in-place.
+  return new (memory) OatHeader(instruction_set,
+                                instruction_set_features,
+                                dex_files,
+                                image_file_location_oat_checksum,
+                                image_file_location_oat_data_begin,
+                                variable_data);
 }
 
 OatHeader::OatHeader(InstructionSet instruction_set,
@@ -33,7 +64,7 @@
                      const std::vector<const DexFile*>* dex_files,
                      uint32_t image_file_location_oat_checksum,
                      uint32_t image_file_location_oat_data_begin,
-                     const std::string& image_file_location) {
+                     const SafeMap<std::string, std::string>* variable_data) {
   memcpy(magic_, kOatMagic, sizeof(kOatMagic));
   memcpy(version_, kOatVersion, sizeof(kOatVersion));
 
@@ -56,9 +87,16 @@
   image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
   UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
 
-  image_file_location_size_ = image_file_location.size();
-  UpdateChecksum(&image_file_location_size_, sizeof(image_file_location_size_));
-  UpdateChecksum(image_file_location.data(), image_file_location_size_);
+  // Flatten the map. Will also update variable_size_data_size_.
+  Flatten(variable_data);
+
+  // Update checksum for variable data size.
+  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
+
+  // Update for data, if existing.
+  if (key_value_store_size_ > 0U) {
+    UpdateChecksum(&key_value_store_, key_value_store_size_);
+  }
 
   executable_offset_ = 0;
   interpreter_to_interpreter_bridge_offset_ = 0;
@@ -327,20 +365,97 @@
   return image_file_location_oat_data_begin_;
 }
 
-uint32_t OatHeader::GetImageFileLocationSize() const {
+uint32_t OatHeader::GetKeyValueStoreSize() const {
   CHECK(IsValid());
-  return image_file_location_size_;
+  return key_value_store_size_;
 }
 
-const uint8_t* OatHeader::GetImageFileLocationData() const {
+const uint8_t* OatHeader::GetKeyValueStore() const {
   CHECK(IsValid());
-  return image_file_location_data_;
+  return key_value_store_;
 }
 
-std::string OatHeader::GetImageFileLocation() const {
-  CHECK(IsValid());
-  return std::string(reinterpret_cast<const char*>(GetImageFileLocationData()),
-                     GetImageFileLocationSize());
+// Advance start until it is either end or \0.
+static const char* ParseString(const char* start, const char* end) {
+  while (start < end && *start != 0) {
+    start++;
+  }
+  return start;
+}
+
+const char* OatHeader::GetStoreValueByKey(const char* key) const {
+  const char* ptr = reinterpret_cast<const char*>(&key_value_store_);
+  const char* end = ptr + key_value_store_size_;
+
+  while (ptr < end) {
+    // Scan for a closing zero.
+    const char* str_end = ParseString(ptr, end);
+    if (str_end < end) {
+      if (strcmp(key, ptr) == 0) {
+        // Same as key. Check if value is OK.
+        if (ParseString(str_end + 1, end) < end) {
+          return str_end + 1;
+        }
+      } else {
+        // Different from key. Advance over the value.
+        ptr = ParseString(str_end + 1, end) + 1;
+      }
+    } else {
+      break;
+    }
+  }
+  // Not found.
+  return nullptr;
+}
+
+bool OatHeader::GetStoreKeyValuePairByIndex(size_t index, const char** key,
+                                            const char** value) const {
+  const char* ptr = reinterpret_cast<const char*>(&key_value_store_);
+  const char* end = ptr + key_value_store_size_;
+  ssize_t counter = static_cast<ssize_t>(index);
+
+  while (ptr < end && counter >= 0) {
+    // Scan for a closing zero.
+    const char* str_end = ParseString(ptr, end);
+    if (str_end < end) {
+      const char* maybe_key = ptr;
+      ptr = ParseString(str_end + 1, end) + 1;
+      if (ptr <= end) {
+        if (counter == 0) {
+          *key = maybe_key;
+          *value = str_end + 1;
+          return true;
+        } else {
+          counter--;
+        }
+      } else {
+        return false;
+      }
+    } else {
+      break;
+    }
+  }
+  // Not found.
+  return false;
+}
+
+size_t OatHeader::GetHeaderSize() const {
+  return sizeof(OatHeader) + key_value_store_size_;
+}
+
+void OatHeader::Flatten(const SafeMap<std::string, std::string>* key_value_store) {
+  char* data_ptr = reinterpret_cast<char*>(&key_value_store_);
+  if (key_value_store != nullptr) {
+    SafeMap<std::string, std::string>::const_iterator it = key_value_store->begin();
+    SafeMap<std::string, std::string>::const_iterator end = key_value_store->end();
+    for ( ; it != end; ++it) {
+      strcpy(data_ptr, it->first.c_str());
+      data_ptr += it->first.length() + 1;
+      strcpy(data_ptr, it->second.c_str());
+      data_ptr += it->second.length() + 1;
+    }
+  }
+  key_value_store_size_ = data_ptr - reinterpret_cast<char*>(&key_value_store_);
 }
 
 OatMethodOffsets::OatMethodOffsets()
diff --git a/runtime/oat.h b/runtime/oat.h
index 7be768c..fbed596 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -23,6 +23,7 @@
 #include "dex_file.h"
 #include "instruction_set.h"
 #include "quick/quick_method_frame_info.h"
+#include "safe_map.h"
 
 namespace art {
 
@@ -31,13 +32,16 @@
   static const uint8_t kOatMagic[4];
   static const uint8_t kOatVersion[4];
 
-  OatHeader();
-  OatHeader(InstructionSet instruction_set,
-            const InstructionSetFeatures& instruction_set_features,
-            const std::vector<const DexFile*>* dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uint32_t image_file_location_oat_data_begin,
-            const std::string& image_file_location);
+  static constexpr const char* kImageLocationKey = "image-location";
+  static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
+  static constexpr const char* kDex2OatHostKey = "dex2oat-host";
+
+  static OatHeader* Create(InstructionSet instruction_set,
+                           const InstructionSetFeatures& instruction_set_features,
+                           const std::vector<const DexFile*>* dex_files,
+                           uint32_t image_file_location_oat_checksum,
+                           uint32_t image_file_location_oat_data_begin,
+                           const SafeMap<std::string, std::string>* variable_data);
 
   bool IsValid() const;
   const char* GetMagic() const;
@@ -88,11 +92,24 @@
   const InstructionSetFeatures& GetInstructionSetFeatures() const;
   uint32_t GetImageFileLocationOatChecksum() const;
   uint32_t GetImageFileLocationOatDataBegin() const;
-  uint32_t GetImageFileLocationSize() const;
-  const uint8_t* GetImageFileLocationData() const;
-  std::string GetImageFileLocation() const;
+
+  uint32_t GetKeyValueStoreSize() const;
+  const uint8_t* GetKeyValueStore() const;
+  const char* GetStoreValueByKey(const char* key) const;
+  bool GetStoreKeyValuePairByIndex(size_t index, const char** key, const char** value) const;
+
+  size_t GetHeaderSize() const;
 
  private:
+  OatHeader(InstructionSet instruction_set,
+            const InstructionSetFeatures& instruction_set_features,
+            const std::vector<const DexFile*>* dex_files,
+            uint32_t image_file_location_oat_checksum,
+            uint32_t image_file_location_oat_data_begin,
+            const SafeMap<std::string, std::string>* variable_data);
+
+  void Flatten(const SafeMap<std::string, std::string>* variable_data);
+
   uint8_t magic_[4];
   uint8_t version_[4];
   uint32_t adler32_checksum_;
@@ -114,8 +131,9 @@
 
   uint32_t image_file_location_oat_checksum_;
   uint32_t image_file_location_oat_data_begin_;
-  uint32_t image_file_location_size_;
-  uint8_t image_file_location_data_[0];  // note variable width data at end
+
+  uint32_t key_value_store_size_;
+  uint8_t key_value_store_[0];  // note variable width data at end
 
   DISALLOW_COPY_AND_ASSIGN(OatHeader);
 };
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 6c44aa9..86c1bae 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -17,6 +17,7 @@
 #include "oat_file.h"
 
 #include <dlfcn.h>
+#include <sstream>
 
 #include "base/bit_vector.h"
 #include "base/stl_util.h"
@@ -28,6 +29,7 @@
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "os.h"
+#include "runtime.h"
 #include "utils.h"
 #include "vmap_table.h"
 
@@ -55,28 +57,29 @@
                        std::string* error_msg) {
   CHECK(!filename.empty()) << location;
   CheckLocation(filename);
-  if (kUsePortableCompiler) {
+  std::unique_ptr<OatFile> ret;
+  if (kUsePortableCompiler && executable) {
     // If we are using PORTABLE, use dlopen to deal with relocations.
     //
     // We use our own ELF loader for Quick to deal with legacy apps that
     // open a generated dex file by name, remove the file, then open
     // another generated dex file with the same name. http://b/10614658
-    if (executable) {
-      return OpenDlopen(filename, location, requested_base, error_msg);
+    ret.reset(OpenDlopen(filename, location, requested_base, error_msg));
+  } else {
+    // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+    //
+    // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+    //
+    // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+    // This won't work for portable runtime execution because it doesn't process relocations.
+    std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
+    if (file.get() == NULL) {
+      *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+      return nullptr;
     }
+    ret.reset(OpenElfFile(file.get(), location, requested_base, false, executable, error_msg));
   }
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  // This won't work for portable runtime execution because it doesn't process relocations.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file.get() == NULL) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
-    return NULL;
-  }
-  return OpenElfFile(file.get(), location, requested_base, false, executable, error_msg);
+  return ret.release();
 }
 
 OatFile* OatFile::OpenWritable(File* file, const std::string& location, std::string* error_msg) {
@@ -206,11 +209,11 @@
     return false;
   }
 
-  oat += GetOatHeader().GetImageFileLocationSize();
+  oat += GetOatHeader().GetKeyValueStoreSize();
   if (oat > End()) {
-    *error_msg = StringPrintf("In oat file '%s' found truncated image file location: "
+    *error_msg = StringPrintf("In oat file '%s' found truncated variable-size data: "
                               "%p + %zd + %ud <= %p", GetLocation().c_str(),
-                              Begin(), sizeof(OatHeader), GetOatHeader().GetImageFileLocationSize(),
+                              Begin(), sizeof(OatHeader), GetOatHeader().GetKeyValueStoreSize(),
                               End());
     return false;
   }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 70253af..44f4466 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -24,7 +24,7 @@
 #include "dex_file.h"
 #include "invoke_type.h"
 #include "mem_map.h"
-#include "mirror/art_method.h"
+#include "mirror/class.h"
 #include "oat.h"
 #include "os.h"
 
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index d8c1c40..592deed 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -24,12 +24,14 @@
 // For size_t.
 #include <stdlib.h>
 
+#include "base/macros.h"
+
 namespace art {
 namespace mirror {
-class Class;
-class Object;
-template<class MirrorType> class HeapReference;
-class Reference;
+  class Class;
+  class Object;
+  template<class MirrorType> class HeapReference;
+  class Reference;
 }  // namespace mirror
 class StackVisitor;
 
@@ -57,8 +59,7 @@
 // A callback for visiting an object in the heap.
 typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
 // A callback used for marking an object, returns the new address of the object if the object moved.
-typedef mirror::Object* (MarkObjectCallback)(mirror::Object* obj, void* arg)
-    __attribute__((warn_unused_result));
+typedef mirror::Object* (MarkObjectCallback)(mirror::Object* obj, void* arg) WARN_UNUSED;
 // A callback for verifying roots.
 typedef void (VerifyRootCallback)(const mirror::Object* root, void* arg, size_t vreg,
     const StackVisitor* visitor, RootType root_type);
@@ -68,13 +69,12 @@
 
 // A callback for testing if an object is marked, returns nullptr if not marked, otherwise the new
 // address the object (if the object didn't move, returns the object input parameter).
-typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
-    __attribute__((warn_unused_result));
+typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg) WARN_UNUSED;
 
 // Returns true if the object in the heap reference is marked, if it is marked and has moved the
 // callback updates the heap reference contain the new value.
 typedef bool (IsHeapReferenceMarkedCallback)(mirror::HeapReference<mirror::Object>* object,
-    void* arg) __attribute__((warn_unused_result));
+    void* arg) WARN_UNUSED;
 typedef void (ProcessMarkStackCallback)(void* arg);
 
 }  // namespace art
diff --git a/runtime/object_lock.cc b/runtime/object_lock.cc
new file mode 100644
index 0000000..f7accc0
--- /dev/null
+++ b/runtime/object_lock.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "object_lock.h"
+
+#include "mirror/object-inl.h"
+#include "monitor.h"
+
+namespace art {
+
+template <typename T>
+ObjectLock<T>::ObjectLock(Thread* self, Handle<T> object) : self_(self), obj_(object) {
+  CHECK(object.Get() != nullptr);
+  obj_->MonitorEnter(self_);
+}
+
+template <typename T>
+ObjectLock<T>::~ObjectLock() {
+  obj_->MonitorExit(self_);
+}
+
+template <typename T>
+void ObjectLock<T>::WaitIgnoringInterrupts() {
+  Monitor::Wait(self_, obj_.Get(), 0, 0, false, kWaiting);
+}
+
+template <typename T>
+void ObjectLock<T>::Notify() {
+  obj_->Notify(self_);
+}
+
+template <typename T>
+void ObjectLock<T>::NotifyAll() {
+  obj_->NotifyAll(self_);
+}
+
+template class ObjectLock<mirror::Class>;
+template class ObjectLock<mirror::Object>;
+
+}  // namespace art
diff --git a/runtime/object_lock.h b/runtime/object_lock.h
new file mode 100644
index 0000000..acddc03
--- /dev/null
+++ b/runtime/object_lock.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OBJECT_LOCK_H_
+#define ART_RUNTIME_OBJECT_LOCK_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "handle.h"
+
+namespace art {
+
+class Thread;
+
+template <typename T>
+class ObjectLock {
+ public:
+  ObjectLock(Thread* self, Handle<T> object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ~ObjectLock() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void WaitIgnoringInterrupts() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void Notify() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void NotifyAll() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  Thread* const self_;
+  Handle<T> const obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ObjectLock);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_OBJECT_LOCK_H_
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
deleted file mode 100644
index 28ce8f3..0000000
--- a/runtime/object_utils.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_OBJECT_UTILS_H_
-#define ART_RUNTIME_OBJECT_UTILS_H_
-
-#include "class_linker.h"
-#include "dex_file.h"
-#include "monitor.h"
-#include "mirror/art_field.h"
-#include "mirror/art_method.h"
-#include "mirror/class.h"
-#include "mirror/dex_cache.h"
-#include "mirror/iftable.h"
-#include "mirror/proxy.h"
-#include "mirror/string.h"
-
-#include "runtime.h"
-#include "handle_scope-inl.h"
-
-#include <string>
-
-namespace art {
-
-template <typename T>
-class ObjectLock {
- public:
-  ObjectLock(Thread* self, Handle<T> object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : self_(self), obj_(object) {
-    CHECK(object.Get() != nullptr);
-    obj_->MonitorEnter(self_);
-  }
-
-  ~ObjectLock() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->MonitorExit(self_);
-  }
-
-  void WaitIgnoringInterrupts() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Monitor::Wait(self_, obj_.Get(), 0, 0, false, kWaiting);
-  }
-
-  void Notify() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->Notify(self_);
-  }
-
-  void NotifyAll() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->NotifyAll(self_);
-  }
-
- private:
-  Thread* const self_;
-  Handle<T> const obj_;
-  DISALLOW_COPY_AND_ASSIGN(ObjectLock);
-};
-
-class FieldHelper {
- public:
-  explicit FieldHelper(Handle<mirror::ArtField> f) : field_(f) {}
-
-  void ChangeField(mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_f != nullptr);
-    field_.Assign(new_f);
-  }
-
-  mirror::ArtField* GetField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return field_.Get();
-  }
-
-  mirror::Class* GetType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t field_index = field_->GetDexFieldIndex();
-    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
-                                                                   field_->GetTypeDescriptor());
-    }
-    const DexFile* dex_file = field_->GetDexFile();
-    const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
-    mirror::Class* type = field_->GetDexCache()->GetResolvedType(field_id.type_idx_);
-    if (resolve && (type == nullptr)) {
-      type = Runtime::Current()->GetClassLinker()->ResolveType(field_id.type_idx_, field_.Get());
-      CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
-    }
-    return type;
-  }
-
-  // The returned const char* is only guaranteed to be valid for the lifetime of the FieldHelper.
-  // If you need it longer, copy it into a std::string.
-  const char* GetDeclaringClassDescriptor()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t field_index = field_->GetDexFieldIndex();
-    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      DCHECK(field_->IsStatic());
-      DCHECK_LT(field_index, 2U);
-      // 0 == Class[] interfaces; 1 == Class[][] throws;
-      declaring_class_descriptor_ = field_->GetDeclaringClass()->GetDescriptor();
-      return declaring_class_descriptor_.c_str();
-    }
-    const DexFile* dex_file = field_->GetDexFile();
-    const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
-    return dex_file->GetFieldDeclaringClassDescriptor(field_id);
-  }
-
- private:
-  Handle<mirror::ArtField> field_;
-  std::string declaring_class_descriptor_;
-
-  DISALLOW_COPY_AND_ASSIGN(FieldHelper);
-};
-
-class MethodHelper {
- public:
-  explicit MethodHelper(Handle<mirror::ArtMethod> m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : method_(m), shorty_(nullptr), shorty_len_(0) {
-    SetMethod(m.Get());
-  }
-
-  void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_m != nullptr);
-    SetMethod(new_m);
-    shorty_ = nullptr;
-  }
-
-  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetInterfaceMethodIfProxy();
-  }
-
-  mirror::String* GetNameAsString(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile* dex_file = method_->GetDexFile();
-    mirror::ArtMethod* method = method_->GetInterfaceMethodIfProxy();
-    uint32_t dex_method_idx = method->GetDexMethodIndex();
-    const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
-    StackHandleScope<1> hs(self);
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
-    return Runtime::Current()->GetClassLinker()->ResolveString(*dex_file, method_id.name_idx_,
-                                                               dex_cache);
-  }
-
-  const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* result = shorty_;
-    if (result == nullptr) {
-      result = method_->GetShorty(&shorty_len_);
-      shorty_ = result;
-    }
-    return result;
-  }
-
-  uint32_t GetShortyLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (shorty_ == nullptr) {
-      GetShorty();
-    }
-    return shorty_len_;
-  }
-
-  // Counts the number of references in the parameter list of the corresponding method.
-  // Note: Thus does _not_ include "this" for non-static methods.
-  uint32_t GetNumberOfReferenceArgsWithoutReceiver() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* shorty = GetShorty();
-    uint32_t refs = 0;
-    for (uint32_t i = 1; i < shorty_len_ ; ++i) {
-      if (shorty[i] == 'L') {
-        refs++;
-      }
-    }
-
-    return refs;
-  }
-
-  // May cause thread suspension due to GetClassFromTypeIdx calling ResolveType this caused a large
-  // number of bugs at call sites.
-  mirror::Class* GetReturnType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* method = GetMethod();
-    const DexFile* dex_file = method->GetDexFile();
-    const DexFile::MethodId& method_id = dex_file->GetMethodId(method->GetDexMethodIndex());
-    const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
-    uint16_t return_type_idx = proto_id.return_type_idx_;
-    return GetClassFromTypeIdx(return_type_idx, resolve);
-  }
-
-  size_t NumArgs() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // "1 +" because the first in Args is the receiver.
-    // "- 1" because we don't count the return type.
-    return (method_->IsStatic() ? 0 : 1) + GetShortyLength() - 1;
-  }
-
-  // Get the primitive type associated with the given parameter.
-  Primitive::Type GetParamPrimitiveType(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK_LT(param, NumArgs());
-    if (GetMethod()->IsStatic()) {
-      param++;  // 0th argument must skip return value at start of the shorty
-    } else if (param == 0) {
-      return Primitive::kPrimNot;
-    }
-    return Primitive::GetType(GetShorty()[param]);
-  }
-
-  // Is the specified parameter a long or double, where parameter 0 is 'this' for instance methods.
-  bool IsParamALongOrDouble(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Primitive::Type type = GetParamPrimitiveType(param);
-    return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
-  }
-
-  // Is the specified parameter a reference, where parameter 0 is 'this' for instance methods.
-  bool IsParamAReference(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetParamPrimitiveType(param) == Primitive::kPrimNot;
-  }
-
-  bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile* dex_file = method_->GetDexFile();
-    const DexFile::MethodId& mid = dex_file->GetMethodId(GetMethod()->GetDexMethodIndex());
-    if (method_->GetDexCache() == other->method_->GetDexCache()) {
-      const DexFile::MethodId& other_mid =
-          dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
-      return mid.name_idx_ == other_mid.name_idx_ && mid.proto_idx_ == other_mid.proto_idx_;
-    }
-    const DexFile* other_dex_file = other->method_->GetDexFile();
-    const DexFile::MethodId& other_mid =
-        other_dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
-    if (!DexFileStringEquals(dex_file, mid.name_idx_, other_dex_file, other_mid.name_idx_)) {
-      return false;  // Name mismatch.
-    }
-    return dex_file->GetMethodSignature(mid) == other_dex_file->GetMethodSignature(other_mid);
-  }
-
-  bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
-      return false;
-    }
-    const DexFile::TypeList* types = method_->GetParameterTypeList();
-    const DexFile::TypeList* other_types = other->method_->GetParameterTypeList();
-    if (types == nullptr) {
-      return (other_types == nullptr) || (other_types->Size() == 0);
-    } else if (UNLIKELY(other_types == nullptr)) {
-      return types->Size() == 0;
-    }
-    uint32_t num_types = types->Size();
-    if (UNLIKELY(num_types != other_types->Size())) {
-      return false;
-    }
-    for (uint32_t i = 0; i < num_types; ++i) {
-      mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
-      mirror::Class* other_param_type =
-          other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
-      if (UNLIKELY(param_type != other_param_type)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  mirror::Class* GetClassFromTypeIdx(uint16_t type_idx, bool resolve = true)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* method = GetMethod();
-    mirror::Class* type = method->GetDexCacheResolvedTypes()->Get(type_idx);
-    if (type == nullptr && resolve) {
-      type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-      CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
-    }
-    return type;
-  }
-
-  mirror::Class* GetDexCacheResolvedType(uint16_t type_idx)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetMethod()->GetDexCacheResolvedTypes()->Get(type_idx);
-  }
-
-  mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* method = GetMethod();
-    mirror::String* s = method->GetDexCacheStrings()->Get(string_idx);
-    if (UNLIKELY(s == nullptr)) {
-      StackHandleScope<1> hs(Thread::Current());
-      Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
-      s = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), string_idx,
-                                                              dex_cache);
-    }
-    return s;
-  }
-
-  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* method = GetMethod();
-    const DexFile* dexfile = method->GetDexFile();
-    if (dexfile == &other_dexfile) {
-      return method->GetDexMethodIndex();
-    }
-    const DexFile::MethodId& mid = dexfile->GetMethodId(method->GetDexMethodIndex());
-    const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
-    const DexFile::StringId* other_descriptor =
-        other_dexfile.FindStringId(mid_declaring_class_descriptor);
-    if (other_descriptor != nullptr) {
-      const DexFile::TypeId* other_type_id =
-          other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
-      if (other_type_id != nullptr) {
-        const char* mid_name = dexfile->GetMethodName(mid);
-        const DexFile::StringId* other_name = other_dexfile.FindStringId(mid_name);
-        if (other_name != nullptr) {
-          uint16_t other_return_type_idx;
-          std::vector<uint16_t> other_param_type_idxs;
-          bool success = other_dexfile.CreateTypeList(
-              dexfile->GetMethodSignature(mid).ToString(), &other_return_type_idx,
-              &other_param_type_idxs);
-          if (success) {
-            const DexFile::ProtoId* other_sig =
-                other_dexfile.FindProtoId(other_return_type_idx, other_param_type_idxs);
-            if (other_sig != nullptr) {
-              const  DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
-                  *other_type_id, *other_name, *other_sig);
-              if (other_mid != nullptr) {
-                return other_dexfile.GetIndexForMethodId(*other_mid);
-              }
-            }
-          }
-        }
-      }
-    }
-    return DexFile::kDexNoIndex;
-  }
-
-  // The name_and_signature_idx MUST point to a MethodId with the same name and signature in the
-  // other_dexfile, such as the method index used to resolve this method in the other_dexfile.
-  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
-                                            uint32_t name_and_signature_idx)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* method = GetMethod();
-    const DexFile* dexfile = method->GetDexFile();
-    const uint32_t dex_method_idx = method->GetDexMethodIndex();
-    const DexFile::MethodId& mid = dexfile->GetMethodId(dex_method_idx);
-    const DexFile::MethodId& name_and_sig_mid = other_dexfile.GetMethodId(name_and_signature_idx);
-    DCHECK_STREQ(dexfile->GetMethodName(mid), other_dexfile.GetMethodName(name_and_sig_mid));
-    DCHECK_EQ(dexfile->GetMethodSignature(mid), other_dexfile.GetMethodSignature(name_and_sig_mid));
-    if (dexfile == &other_dexfile) {
-      return dex_method_idx;
-    }
-    const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
-    const DexFile::StringId* other_descriptor =
-        other_dexfile.FindStringId(mid_declaring_class_descriptor);
-    if (other_descriptor != nullptr) {
-      const DexFile::TypeId* other_type_id =
-          other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
-      if (other_type_id != nullptr) {
-        const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
-            *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
-            other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
-        if (other_mid != nullptr) {
-          return other_dexfile.GetIndexForMethodId(*other_mid);
-        }
-      }
-    }
-    return DexFile::kDexNoIndex;
-  }
-
- private:
-  // Set the method_ field, for proxy methods looking up the interface method via the resolved
-  // methods table.
-  void SetMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    method_.Assign(method);
-  }
-
-  Handle<mirror::ArtMethod> method_;
-  const char* shorty_;
-  uint32_t shorty_len_;
-
-  DISALLOW_COPY_AND_ASSIGN(MethodHelper);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_OBJECT_UTILS_H_
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index e1e133f..9a1d0f7 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -24,11 +24,13 @@
 #include "debugger.h"
 #include "gc/heap.h"
 #include "monitor.h"
+#include "runtime.h"
+#include "trace.h"
 #include "utils.h"
 
 namespace art {
 
-ParsedOptions* ParsedOptions::Create(const Runtime::Options& options, bool ignore_unrecognized) {
+ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
   std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
   if (parsed->Parse(options, ignore_unrecognized)) {
     return parsed.release();
@@ -164,7 +166,7 @@
   return true;
 }
 
-bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecognized) {
+bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized) {
   const char* boot_class_path_string = getenv("BOOTCLASSPATH");
   if (boot_class_path_string != NULL) {
     boot_class_path_string_ = boot_class_path_string;
@@ -197,13 +199,18 @@
 #else
 #error "ART default GC type must be set"
 #endif
+  // If we are using homogeneous space compaction then default background compaction to off since
+  // homogeneous space compactions when we transition to not jank perceptible.
+  use_homogeneous_space_compaction_for_oom_ = false;
   // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
-  // parsing options.
+  // parsing options. If you set this to kCollectorTypeHSpaceCompact then we will do an hspace
+  // compaction when we transition to background instead of a normal collector transition.
   background_collector_type_ = gc::kCollectorTypeSS;
   stack_size_ = 0;  // 0 means default.
   max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   low_memory_mode_ = false;
   use_tlab_ = false;
+  min_interval_homogeneous_space_compaction_by_oom_ = MsToNs(100 * 1000);  // 100s.
   verify_pre_gc_heap_ = false;
   // Pre sweeping is the one that usually fails if the GC corrupted the heap.
   verify_pre_sweeping_heap_ = kIsDebugBuild;
@@ -253,43 +260,11 @@
   method_trace_file_ = "/data/method-trace-file.bin";
   method_trace_file_size_ = 10 * MB;
 
-  profile_clock_source_ = kDefaultProfilerClockSource;
+  profile_clock_source_ = kDefaultTraceClockSource;
 
   verify_ = true;
   image_isa_ = kRuntimeISA;
 
-  // Default to explicit checks.  Switch off with -implicit-checks:.
-  // or setprop dalvik.vm.implicit_checks check1,check2,...
-#ifdef HAVE_ANDROID_OS
-  {
-    char buf[PROP_VALUE_MAX];
-    property_get("dalvik.vm.implicit_checks", buf, "null,stack");
-    std::string checks(buf);
-    std::vector<std::string> checkvec;
-    Split(checks, ',', checkvec);
-    explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-        kExplicitStackOverflowCheck;
-    for (auto& str : checkvec) {
-      std::string val = Trim(str);
-      if (val == "none") {
-        explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-          kExplicitStackOverflowCheck;
-      } else if (val == "null") {
-        explicit_checks_ &= ~kExplicitNullCheck;
-      } else if (val == "suspend") {
-        explicit_checks_ &= ~kExplicitSuspendCheck;
-      } else if (val == "stack") {
-        explicit_checks_ &= ~kExplicitStackOverflowCheck;
-      } else if (val == "all") {
-        explicit_checks_ = 0;
-      }
-    }
-  }
-#else
-  explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-    kExplicitStackOverflowCheck;
-#endif
-
   for (size_t i = 0; i < options.size(); ++i) {
     if (true && options[0].first == "-Xzygote") {
       LOG(INFO) << "option[" << i << "]=" << options[i].first;
@@ -305,6 +280,7 @@
       Exit(0);
     } else if (StartsWith(option, "-Xbootclasspath:")) {
       boot_class_path_string_ = option.substr(strlen("-Xbootclasspath:")).data();
+      LOG(INFO) << "setting boot class path to " << boot_class_path_string_;
     } else if (option == "-classpath" || option == "-cp") {
       // TODO: support -Djava.class.path
       i++;
@@ -416,6 +392,10 @@
       low_memory_mode_ = true;
     } else if (option == "-XX:UseTLAB") {
       use_tlab_ = true;
+    } else if (option == "-XX:EnableHSpaceCompactForOOM") {
+      use_homogeneous_space_compaction_for_oom_ = true;
+    } else if (option == "-XX:DisableHSpaceCompactForOOM") {
+      use_homogeneous_space_compaction_for_oom_ = false;
     } else if (StartsWith(option, "-D")) {
       properties_.push_back(option.substr(strlen("-D")));
     } else if (StartsWith(option, "-Xjnitrace:")) {
@@ -439,12 +419,17 @@
       if (!ParseStringAfterChar(option, '=', &substring)) {
         return false;
       }
-      gc::CollectorType collector_type = ParseCollectorType(substring);
-      if (collector_type != gc::kCollectorTypeNone) {
-        background_collector_type_ = collector_type;
+      // Special handling for HSpaceCompact since this is only valid as a background GC type.
+      if (substring == "HSpaceCompact") {
+        background_collector_type_ = gc::kCollectorTypeHomogeneousSpaceCompact;
       } else {
-        Usage("Unknown -XX:BackgroundGC option %s\n", substring.c_str());
-        return false;
+        gc::CollectorType collector_type = ParseCollectorType(substring);
+        if (collector_type != gc::kCollectorTypeNone) {
+          background_collector_type_ = collector_type;
+        } else {
+          Usage("Unknown -XX:BackgroundGC option %s\n", substring.c_str());
+          return false;
+        }
       }
     } else if (option == "-XX:+DisableExplicitGC") {
       is_explicit_gc_disabled_ = true;
@@ -528,11 +513,11 @@
         return false;
       }
     } else if (option == "-Xprofile:threadcpuclock") {
-      Trace::SetDefaultClockSource(kProfilerClockSourceThreadCpu);
+      Trace::SetDefaultClockSource(kTraceClockSourceThreadCpu);
     } else if (option == "-Xprofile:wallclock") {
-      Trace::SetDefaultClockSource(kProfilerClockSourceWall);
+      Trace::SetDefaultClockSource(kTraceClockSourceWall);
     } else if (option == "-Xprofile:dualclock") {
-      Trace::SetDefaultClockSource(kProfilerClockSourceDual);
+      Trace::SetDefaultClockSource(kTraceClockSourceDual);
     } else if (option == "-Xenable-profiler") {
       profiler_options_.enabled_ = true;
     } else if (StartsWith(option, "-Xprofile-filename:")) {
@@ -573,54 +558,6 @@
       if (!ParseUnsignedInteger(option, ':', &profiler_options_.max_stack_depth_)) {
         return false;
       }
-    } else if (StartsWith(option, "-implicit-checks:")) {
-      std::string checks;
-      if (!ParseStringAfterChar(option, ':', &checks)) {
-        return false;
-      }
-      std::vector<std::string> checkvec;
-      Split(checks, ',', checkvec);
-      for (auto& str : checkvec) {
-        std::string val = Trim(str);
-        if (val == "none") {
-          explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-            kExplicitStackOverflowCheck;
-        } else if (val == "null") {
-          explicit_checks_ &= ~kExplicitNullCheck;
-        } else if (val == "suspend") {
-          explicit_checks_ &= ~kExplicitSuspendCheck;
-        } else if (val == "stack") {
-          explicit_checks_ &= ~kExplicitStackOverflowCheck;
-        } else if (val == "all") {
-          explicit_checks_ = 0;
-        } else {
-            return false;
-        }
-      }
-    } else if (StartsWith(option, "-explicit-checks:")) {
-      std::string checks;
-      if (!ParseStringAfterChar(option, ':', &checks)) {
-        return false;
-      }
-      std::vector<std::string> checkvec;
-      Split(checks, ',', checkvec);
-      for (auto& str : checkvec) {
-        std::string val = Trim(str);
-        if (val == "none") {
-          explicit_checks_ = 0;
-        } else if (val == "null") {
-          explicit_checks_ |= kExplicitNullCheck;
-        } else if (val == "suspend") {
-          explicit_checks_ |= kExplicitSuspendCheck;
-        } else if (val == "stack") {
-          explicit_checks_ |= kExplicitStackOverflowCheck;
-        } else if (val == "all") {
-          explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-            kExplicitStackOverflowCheck;
-        } else {
-          return false;
-        }
-      }
     } else if (StartsWith(option, "-Xcompiler:")) {
       if (!ParseStringAfterChar(option, ':', &compiler_executable_)) {
         return false;
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index d0f3c12..23f2bcf 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -18,17 +18,26 @@
 #define ART_RUNTIME_PARSED_OPTIONS_H_
 
 #include <string>
+#include <vector>
 
+#include <jni.h>
+
+#include "globals.h"
 #include "gc/collector_type.h"
-#include "runtime.h"
-#include "trace.h"
+#include "instruction_set.h"
+#include "profiler_options.h"
 
 namespace art {
 
+class CompilerCallbacks;
+class DexFile;
+
+typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
+
 class ParsedOptions {
  public:
   // returns null if problem parsing and ignore_unrecognized is false
-  static ParsedOptions* Create(const Runtime::Options& options, bool ignore_unrecognized);
+  static ParsedOptions* Create(const RuntimeOptions& options, bool ignore_unrecognized);
 
   const std::vector<const DexFile*>* boot_class_path_;
   std::string boot_class_path_string_;
@@ -80,14 +89,16 @@
   std::vector<std::string> image_compiler_options_;
   ProfilerOptions profiler_options_;
   std::string profile_output_filename_;
-  ProfilerClockSource profile_clock_source_;
+  TraceClockSource profile_clock_source_;
   bool verify_;
   InstructionSet image_isa_;
 
-  static constexpr uint32_t kExplicitNullCheck = 1;
-  static constexpr uint32_t kExplicitSuspendCheck = 2;
-  static constexpr uint32_t kExplicitStackOverflowCheck = 4;
-  uint32_t explicit_checks_;
+  // Whether or not we use homogeneous space compaction to avoid OOM errors. If enabled,
+  // the heap will attempt to create an extra space which enables compacting from a malloc space to
+  // another malloc space when we are about to throw OOM.
+  bool use_homogeneous_space_compaction_for_oom_;
+  // Minimal interval allowed between two homogeneous space compactions caused by OOM.
+  uint64_t min_interval_homogeneous_space_compaction_by_oom_;
 
  private:
   ParsedOptions() {}
@@ -99,7 +110,7 @@
   void Exit(int status);
   void Abort();
 
-  bool Parse(const Runtime::Options& options,  bool ignore_unrecognized);
+  bool Parse(const RuntimeOptions& options,  bool ignore_unrecognized);
   bool ParseXGcOption(const std::string& option);
   bool ParseStringAfterChar(const std::string& option, char after_char, std::string* parsed_value);
   bool ParseInteger(const std::string& option, char after_char, int* parsed_value);
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index b58a29c..5154d69 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -36,7 +36,7 @@
   boot_class_path += "-Xbootclasspath:";
   boot_class_path += lib_core;
 
-  Runtime::Options options;
+  RuntimeOptions options;
   options.push_back(std::make_pair(boot_class_path.c_str(), null));
   options.push_back(std::make_pair("-classpath", null));
   options.push_back(std::make_pair(lib_core.c_str(), null));
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 7a7a92a..9514448 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -32,7 +32,6 @@
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 093c129..bd6656d 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
-#include "common_compiler_test.h"
-#include "mirror/art_field-inl.h"
-
 #include <jni.h>
 #include <vector>
 
+#include "common_compiler_test.h"
+#include "field_helper.h"
+#include "mirror/art_field-inl.h"
+#include "scoped_thread_state_change.h"
+
 namespace art {
 
 class ProxyTest : public CommonCompilerTest {
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 5128b19..982553d 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -48,6 +48,7 @@
   kIntrinsicMinMaxFloat,
   kIntrinsicMinMaxDouble,
   kIntrinsicSqrt,
+  kIntrinsicGet,
   kIntrinsicCharAt,
   kIntrinsicCompareTo,
   kIntrinsicIsEmptyOrLength,
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 1034923..6581f9b 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -16,10 +16,14 @@
 
 #include "quick_exception_handler.h"
 
+#include "arch/context.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "handle_scope-inl.h"
 #include "mirror/art_method-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/throwable.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index f4af61f..1385c60 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -35,4 +35,9 @@
 #error "Only one of Baker or Brooks can be enabled at a time."
 #endif
 
+// A placeholder marker to indicate places to add read barriers in the
+// assembly code. This is a development time aid and to be removed
+// after read barriers are added.
+#define THIS_LOAD_REQUIRES_READ_BARRIER
+
 #endif  // ART_RUNTIME_READ_BARRIER_C_H_
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index 3229039..d2877f9 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -17,6 +17,10 @@
 #include "reference_table.h"
 
 #include "common_runtime_test.h"
+#include "mirror/array.h"
+#include "mirror/string.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index fe5e104..758c1bb 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -20,14 +20,14 @@
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "jni_internal.h"
+#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
-#include "mirror/class.h"
 #include "mirror/class-inl.h"
-#include "mirror/object_array.h"
+#include "mirror/class.h"
 #include "mirror/object_array-inl.h"
+#include "mirror/object_array.h"
 #include "nth_caller_visitor.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
 #include "well_known_classes.h"
@@ -567,6 +567,11 @@
   return true;
 }
 
+static std::string PrettyDescriptor(Primitive::Type type) {
+  std::string descriptor_string(Primitive::Descriptor(type));
+  return PrettyDescriptor(descriptor_string);
+}
+
 bool ConvertPrimitiveValue(const ThrowLocation* throw_location, bool unbox_for_result,
                            Primitive::Type srcType, Primitive::Type dstType,
                            const JValue& src, JValue* dst) {
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 3b66abe..9d10daa 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -18,9 +18,11 @@
 
 #include <float.h>
 #include <limits.h>
+#include "ScopedLocalRef.h"
 
 #include "common_compiler_test.h"
 #include "mirror/art_method-inl.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
@@ -109,7 +111,16 @@
                         : c->FindVirtualMethod(method_name, method_signature);
     CHECK(method != nullptr);
 
-    *receiver = (is_static ? nullptr : c->AllocObject(self));
+    if (is_static) {
+      *receiver = nullptr;
+    } else {
+      // Ensure class is initialized before allocating object
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(c));
+      bool initialized = class_linker_->EnsureInitialized(h_class, true, true);
+      CHECK(initialized);
+      *receiver = c->AllocObject(self);
+    }
 
     // Start runtime.
     bool started = runtime_->Start();
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 29ddd1d..f776bcd 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -19,24 +19,56 @@
 
 #include "runtime.h"
 
+#include "read_barrier-inl.h"
+
 namespace art {
 
-inline QuickMethodFrameInfo Runtime::GetRuntimeMethodFrameInfo(mirror::ArtMethod* method) const {
+inline QuickMethodFrameInfo Runtime::GetRuntimeMethodFrameInfo(mirror::ArtMethod* method) {
   DCHECK(method != nullptr);
   // Cannot be imt-conflict-method or resolution-method.
   DCHECK(method != GetImtConflictMethod());
   DCHECK(method != GetResolutionMethod());
   // Don't use GetCalleeSaveMethod(), some tests don't set all callee save methods.
-  if (method == callee_save_methods_[Runtime::kRefsAndArgs]) {
+  if (method == GetCalleeSaveMethodUnchecked(Runtime::kRefsAndArgs)) {
     return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-  } else if (method == callee_save_methods_[Runtime::kSaveAll]) {
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAll)) {
     return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll);
   } else {
-    DCHECK(method == callee_save_methods_[Runtime::kRefsOnly]);
+    DCHECK(method == GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly));
     return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly);
   }
 }
 
+inline mirror::ArtMethod* Runtime::GetResolutionMethod() {
+  CHECK(HasResolutionMethod());
+  return ReadBarrier::BarrierForRoot<mirror::ArtMethod, kWithReadBarrier>(&resolution_method_);
+}
+
+inline mirror::ArtMethod* Runtime::GetImtConflictMethod() {
+  CHECK(HasImtConflictMethod());
+  return ReadBarrier::BarrierForRoot<mirror::ArtMethod, kWithReadBarrier>(&imt_conflict_method_);
+}
+
+inline mirror::ObjectArray<mirror::ArtMethod>* Runtime::GetDefaultImt()
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  CHECK(HasDefaultImt());
+  return ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::ArtMethod>, kWithReadBarrier>(
+      &default_imt_);
+}
+
+inline mirror::ArtMethod* Runtime::GetCalleeSaveMethod(CalleeSaveType type)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(HasCalleeSaveMethod(type));
+  return ReadBarrier::BarrierForRoot<mirror::ArtMethod, kWithReadBarrier>(
+      &callee_save_methods_[type]);
+}
+
+inline mirror::ArtMethod* Runtime::GetCalleeSaveMethodUnchecked(CalleeSaveType type)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return ReadBarrier::BarrierForRoot<mirror::ArtMethod, kWithReadBarrier>(
+      &callee_save_methods_[type]);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_RUNTIME_INL_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index efa205e..aca2607 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -49,6 +49,7 @@
 #include "fault_handler.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
+#include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "image.h"
 #include "instrumentation.h"
@@ -140,7 +141,10 @@
       suspend_handler_(nullptr),
       stack_overflow_handler_(nullptr),
       verify_(false),
-      target_sdk_version_(0) {
+      target_sdk_version_(0),
+      implicit_null_checks_(false),
+      implicit_so_checks_(false),
+      implicit_suspend_checks_(false) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     callee_save_methods_[i] = nullptr;
   }
@@ -324,7 +328,7 @@
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
 }
 
-bool Runtime::Create(const Options& options, bool ignore_unrecognized) {
+bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != NULL) {
     return false;
@@ -534,7 +538,7 @@
   VLOG(startup) << "Runtime::StartDaemonThreads exiting";
 }
 
-bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) {
+bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
 
   std::unique_ptr<ParsedOptions> options(ParsedOptions::Create(raw_options, ignore_unrecognized));
@@ -580,41 +584,6 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
-  bool implicit_checks_supported = false;
-  switch (kRuntimeISA) {
-    case kArm:
-    case kThumb2:
-      implicit_checks_supported = true;
-      break;
-    default:
-      break;
-  }
-
-  if (!options->interpreter_only_ && implicit_checks_supported &&
-      (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
-          ParsedOptions::kExplicitNullCheck |
-          ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler)) {
-    fault_manager.Init();
-
-    // These need to be in a specific order.  The null point check handler must be
-    // after the suspend check and stack overflow check handlers.
-    if ((options->explicit_checks_ & ParsedOptions::kExplicitSuspendCheck) == 0) {
-      suspend_handler_ = new SuspensionHandler(&fault_manager);
-    }
-
-    if ((options->explicit_checks_ & ParsedOptions::kExplicitStackOverflowCheck) == 0) {
-      stack_overflow_handler_ = new StackOverflowHandler(&fault_manager);
-    }
-
-    if ((options->explicit_checks_ & ParsedOptions::kExplicitNullCheck) == 0) {
-      null_pointer_handler_ = new NullPointerHandler(&fault_manager);
-    }
-
-    if (kEnableJavaStackTraceHandler) {
-      new JavaStackTraceHandler(&fault_manager);
-    }
-  }
-
   heap_ = new gc::Heap(options->heap_initial_size_,
                        options->heap_growth_limit_,
                        options->heap_min_free_,
@@ -638,13 +607,51 @@
                        options->verify_post_gc_heap_,
                        options->verify_pre_gc_rosalloc_,
                        options->verify_pre_sweeping_rosalloc_,
-                       options->verify_post_gc_rosalloc_);
+                       options->verify_post_gc_rosalloc_,
+                       options->use_homogeneous_space_compaction_for_oom_,
+                       options->min_interval_homogeneous_space_compaction_by_oom_);
 
   dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
 
   BlockSignals();
   InitPlatformSignalHandlers();
 
+  // Change the implicit checks flags based on runtime architecture.
+  switch (kRuntimeISA) {
+    case kArm:
+    case kThumb2:
+    case kX86:
+      implicit_null_checks_ = true;
+      implicit_so_checks_ = true;
+      break;
+    default:
+      // Keep the defaults.
+      break;
+  }
+
+  if (!options->interpreter_only_ &&
+    (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_)) {
+    fault_manager.Init();
+
+    // These need to be in a specific order.  The null point check handler must be
+    // after the suspend check and stack overflow check handlers.
+    if (implicit_suspend_checks_) {
+      suspend_handler_ = new SuspensionHandler(&fault_manager);
+    }
+
+    if (implicit_so_checks_) {
+      stack_overflow_handler_ = new StackOverflowHandler(&fault_manager);
+    }
+
+    if (implicit_null_checks_) {
+      null_pointer_handler_ = new NullPointerHandler(&fault_manager);
+    }
+
+    if (kEnableJavaStackTraceHandler) {
+      new JavaStackTraceHandler(&fault_manager);
+    }
+  }
+
   java_vm_ = new JavaVMExt(this, options.get());
 
   Thread::Startup();
@@ -908,11 +915,13 @@
   thread_list_->Unregister(self);
 }
 
-  mirror::Throwable* Runtime::GetPreAllocatedOutOfMemoryError() const {
-  if (pre_allocated_OutOfMemoryError_ == NULL) {
+mirror::Throwable* Runtime::GetPreAllocatedOutOfMemoryError() {
+  mirror::Throwable* oome = ReadBarrier::BarrierForRoot<mirror::Throwable, kWithReadBarrier>(
+      &pre_allocated_OutOfMemoryError_);
+  if (oome == NULL) {
     LOG(ERROR) << "Failed to return pre-allocated OOME";
   }
-  return pre_allocated_OutOfMemoryError_;
+  return oome;
 }
 
 void Runtime::VisitConstantRoots(RootCallback* callback, void* arg) {
@@ -921,6 +930,7 @@
   mirror::ArtField::VisitRoots(callback, arg);
   mirror::ArtMethod::VisitRoots(callback, arg);
   mirror::Class::VisitRoots(callback, arg);
+  mirror::Reference::VisitRoots(callback, arg);
   mirror::StackTraceElement::VisitRoots(callback, arg);
   mirror::String::VisitRoots(callback, arg);
   mirror::Throwable::VisitRoots(callback, arg);
@@ -1013,8 +1023,8 @@
     method->SetEntryPointFromPortableCompiledCode(nullptr);
     method->SetEntryPointFromQuickCompiledCode(nullptr);
   } else {
-    method->SetEntryPointFromPortableCompiledCode(GetPortableImtConflictTrampoline(class_linker));
-    method->SetEntryPointFromQuickCompiledCode(GetQuickImtConflictTrampoline(class_linker));
+    method->SetEntryPointFromPortableCompiledCode(class_linker->GetPortableImtConflictTrampoline());
+    method->SetEntryPointFromQuickCompiledCode(class_linker->GetQuickImtConflictTrampoline());
   }
   return method.Get();
 }
@@ -1033,8 +1043,8 @@
     method->SetEntryPointFromPortableCompiledCode(nullptr);
     method->SetEntryPointFromQuickCompiledCode(nullptr);
   } else {
-    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(class_linker));
-    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(class_linker));
+    method->SetEntryPointFromPortableCompiledCode(class_linker->GetPortableResolutionTrampoline());
+    method->SetEntryPointFromQuickCompiledCode(class_linker->GetQuickResolutionTrampoline());
   }
   return method.Get();
 }
@@ -1216,37 +1226,6 @@
     argv->push_back("--compiler-filter=interpret-only");
   }
 
-  argv->push_back("--runtime-arg");
-  std::string checkstr = "-implicit-checks";
-
-  int nchecks = 0;
-  char checksep = ':';
-
-  if (!ExplicitNullChecks()) {
-    checkstr += checksep;
-    checksep = ',';
-    checkstr += "null";
-    ++nchecks;
-  }
-  if (!ExplicitSuspendChecks()) {
-    checkstr += checksep;
-    checksep = ',';
-    checkstr += "suspend";
-    ++nchecks;
-  }
-
-  if (!ExplicitStackOverflowChecks()) {
-    checkstr += checksep;
-    checksep = ',';
-    checkstr += "stack";
-    ++nchecks;
-  }
-
-  if (nchecks == 0) {
-    checkstr += ":none";
-  }
-  argv->push_back(checkstr);
-
   // Make the dex2oat instruction set match that of the launching runtime. If we have multiple
   // architecture support, dex2oat may be compiled as a different instruction-set than that
   // currently being executed.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index f839be1..284e4ff 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -68,6 +68,8 @@
 class Trace;
 class Transaction;
 
+typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
+
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
 // (no logical reason to do this). You also may not start logging new roots and stop logging new
 // roots (also no logical reason to do this).
@@ -81,10 +83,8 @@
 
 class Runtime {
  public:
-  typedef std::vector<std::pair<std::string, const void*>> Options;
-
   // Creates and initializes a new runtime.
-  static bool Create(const Options& options, bool ignore_unrecognized)
+  static bool Create(const RuntimeOptions& options, bool ignore_unrecognized)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
 
   bool IsCompiler() const {
@@ -219,8 +219,7 @@
     return monitor_pool_;
   }
 
-  mirror::Throwable* GetPreAllocatedOutOfMemoryError() const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Throwable* GetPreAllocatedOutOfMemoryError() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const std::vector<std::string>& GetProperties() const {
     return properties_;
@@ -266,13 +265,10 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
-  mirror::ArtMethod* GetResolutionMethod() const {
-    CHECK(HasResolutionMethod());
-    return resolution_method_;
-  }
+  mirror::ArtMethod* GetResolutionMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool HasResolutionMethod() const {
-    return resolution_method_ != NULL;
+    return resolution_method_ != nullptr;
   }
 
   void SetResolutionMethod(mirror::ArtMethod* method) {
@@ -281,14 +277,11 @@
 
   mirror::ArtMethod* CreateResolutionMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Returns a special method that calls into a trampoline for runtime imt conflicts
-  mirror::ArtMethod* GetImtConflictMethod() const {
-    CHECK(HasImtConflictMethod());
-    return imt_conflict_method_;
-  }
+  // Returns a special method that calls into a trampoline for runtime imt conflicts.
+  mirror::ArtMethod* GetImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool HasImtConflictMethod() const {
-    return imt_conflict_method_ != NULL;
+    return imt_conflict_method_ != nullptr;
   }
 
   void SetImtConflictMethod(mirror::ArtMethod* method) {
@@ -298,13 +291,11 @@
   mirror::ArtMethod* CreateImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns an imt with every entry set to conflict, used as default imt for all classes.
-  mirror::ObjectArray<mirror::ArtMethod>* GetDefaultImt() const {
-    CHECK(HasDefaultImt());
-    return default_imt_;
-  }
+  mirror::ObjectArray<mirror::ArtMethod>* GetDefaultImt()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool HasDefaultImt() const {
-    return default_imt_ != NULL;
+    return default_imt_ != nullptr;
   }
 
   void SetDefaultImt(mirror::ObjectArray<mirror::ArtMethod>* imt) {
@@ -326,16 +317,18 @@
     return callee_save_methods_[type] != NULL;
   }
 
-  mirror::ArtMethod* GetCalleeSaveMethod(CalleeSaveType type) const {
-    DCHECK(HasCalleeSaveMethod(type));
-    return callee_save_methods_[type];
-  }
+  mirror::ArtMethod* GetCalleeSaveMethod(CalleeSaveType type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::ArtMethod* GetCalleeSaveMethodUnchecked(CalleeSaveType type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   QuickMethodFrameInfo GetCalleeSaveMethodFrameInfo(CalleeSaveType type) const {
     return callee_save_method_frame_infos_[type];
   }
 
-  QuickMethodFrameInfo GetRuntimeMethodFrameInfo(mirror::ArtMethod* method) const;
+  QuickMethodFrameInfo GetRuntimeMethodFrameInfo(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static size_t GetCalleeSaveMethodOffset(CalleeSaveType type) {
     return OFFSETOF_MEMBER(Runtime, callee_save_methods_[type]);
@@ -463,7 +456,7 @@
 
   void BlockSignals();
 
-  bool Init(const Options& options, bool ignore_unrecognized)
+  bool Init(const RuntimeOptions& options, bool ignore_unrecognized)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
   void InitNativeMethods() LOCKS_EXCLUDED(Locks::mutator_lock_);
   void InitThreadGroups(Thread* self);
@@ -596,6 +589,11 @@
   // Specifies target SDK version to allow workarounds for certain API levels.
   int32_t target_sdk_version_;
 
+  // Implicit checks flags.
+  bool implicit_null_checks_;       // NullPointer checks are implicit.
+  bool implicit_so_checks_;         // StackOverflow checks are implicit.
+  bool implicit_suspend_checks_;    // Thread suspension checks are implicit.
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index d5405fb..71e566e 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -16,13 +16,13 @@
 
 #include "stack.h"
 
+#include "arch/context.h"
 #include "base/hex_dump.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime.h"
 #include "thread.h"
@@ -366,9 +366,10 @@
   return result;
 }
 
-instrumentation::InstrumentationStackFrame& StackVisitor::GetInstrumentationStackFrame(uint32_t depth) const {
-  CHECK_LT(depth, thread_->GetInstrumentationStack()->size());
-  return thread_->GetInstrumentationStack()->at(depth);
+static instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(Thread* thread,
+                                                                                uint32_t depth) {
+  CHECK_LT(depth, thread->GetInstrumentationStack()->size());
+  return thread->GetInstrumentationStack()->at(depth);
 }
 
 void StackVisitor::SanityCheckFrame() const {
@@ -431,7 +432,7 @@
           // the stack for an exception where the side stack will be unwound in VisitFrame.
           if (GetQuickInstrumentationExitPc() == return_pc) {
             const instrumentation::InstrumentationStackFrame& instrumentation_frame =
-                GetInstrumentationStackFrame(instrumentation_stack_depth);
+                GetInstrumentationStackFrame(thread_, instrumentation_stack_depth);
             instrumentation_stack_depth++;
             if (GetMethod() == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAll)) {
               // Skip runtime save all callee frames which are used to deliver exceptions.
diff --git a/runtime/stack.h b/runtime/stack.h
index 9402cdd..ef498ef 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -17,20 +17,16 @@
 #ifndef ART_RUNTIME_STACK_H_
 #define ART_RUNTIME_STACK_H_
 
-#include "dex_file.h"
-#include "instrumentation.h"
-#include "arch/context.h"
-#include "base/casts.h"
-#include "base/macros.h"
-#include "instruction_set.h"
-#include "mirror/object.h"
-#include "mirror/object_reference.h"
-#include "utils.h"
-#include "verify_object.h"
-
 #include <stdint.h>
 #include <string>
 
+#include "dex_file.h"
+#include "instruction_set.h"
+#include "mirror/object_reference.h"
+#include "throw_location.h"
+#include "utils.h"
+#include "verify_object.h"
+
 namespace art {
 
 namespace mirror {
@@ -711,8 +707,6 @@
   bool GetFPR(uint32_t reg, uintptr_t* val) const;
   bool SetFPR(uint32_t reg, uintptr_t value);
 
-  instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(uint32_t depth) const;
-
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Thread* const thread_;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7827dfb..f888029 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -32,27 +32,29 @@
 
 #include "arch/context.h"
 #include "base/mutex.h"
-#include "class_linker.h"
 #include "class_linker-inl.h"
+#include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "gc_map.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc/allocator/rosalloc.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
+#include "handle_scope-inl.h"
 #include "handle_scope.h"
 #include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
-#include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
+#include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
-#include "object_utils.h"
+#include "object_lock.h"
 #include "quick_exception_handler.h"
 #include "quick/quick_method_frame_info.h"
 #include "reflection.h"
@@ -60,10 +62,9 @@
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
-#include "handle_scope-inl.h"
 #include "stack.h"
-#include "thread-inl.h"
 #include "thread_list.h"
+#include "thread-inl.h"
 #include "utils.h"
 #include "verifier/dex_gc_map.h"
 #include "verify_object-inl.h"
@@ -231,47 +232,95 @@
   return stack_size;
 }
 
+// Global variable to prevent the compiler optimizing away the page reads for the stack.
+byte dont_optimize_this;
+
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_end_.  Just below that
 // is the StackOverflow reserved region used when creating the StackOverflow
 // exception.
+//
+// There is a little complexity here that deserves a special mention.  When running on the
+// host (glibc), the process's main thread's stack is allocated with a special flag
+// to prevent memory being allocated when it's not needed.  This flag makes the
+// kernel only allocate memory for the stack by growing down in memory.  Because we
+// want to put an mprotected region far away from that at the stack top, we need
+// to make sure the pages for the stack are mapped in before we call mprotect.  We do
+// this by reading every page from the stack bottom (highest address) to the stack top.
+// We then madvise this away.
 void Thread::InstallImplicitProtection(bool is_main_stack) {
   byte* pregion = tlsPtr_.stack_end;
+  byte* stack_lowmem = tlsPtr_.stack_begin;
+  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&pregion) &
+      ~(kPageSize - 1));    // Page containing current top of stack.
 
+  const bool running_on_intel = (kRuntimeISA == kX86) || (kRuntimeISA == kX86_64);
+
+  if (running_on_intel) {
+    // On Intel, we need to map in the main stack.  This must be done by reading from the
+    // current stack pointer downwards as the stack is mapped using VM_GROWSDOWN
+    // in the kernel.  Any access more than a page below the current SP will cause
+    // a segv.
+    if (is_main_stack) {
+      // First we need to unprotect the protected region because this may
+      // be called more than once for a particular stack and we will crash
+      // if we try to read the protected page.
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_READ);
+
+      // Read every page from the high address to the low.
+      for (byte* p = stack_top; p > stack_lowmem; p -= kPageSize) {
+        dont_optimize_this = *p;
+      }
+    }
+  }
+
+  // Check and place a marker word at the lowest usable address in the stack.  This
+  // is used to prevent a double protection.
   constexpr uint32_t kMarker = 0xdadadada;
   uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
   if (*marker == kMarker) {
-    // The region has already been set up.
+    // The region has already been set up.  But on the main stack on the host we have
+    // removed the protected region in order to read the stack memory.  We need to put
+    // this back again.
+    if (is_main_stack && running_on_intel) {
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_NONE);
+      madvise(stack_lowmem, stack_top - stack_lowmem, MADV_DONTNEED);
+    }
     return;
   }
   // Add marker so that we can detect a second attempt to do this.
   *marker = kMarker;
 
-  pregion -= kStackOverflowProtectedSize;
-
-  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.  We only need to touch one byte per page.
-  if (is_main_stack) {
-    byte* start = pregion;
-    byte* end = pregion + kStackOverflowProtectedSize;
-    while (start < end) {
-      *start = static_cast<byte>(0);
-      start += kPageSize;
+  if (!running_on_intel) {
+    // Running on !Intel, stacks are mapped cleanly.  The protected region for the
+    // main stack just needs to be mapped in.  We do this by writing one byte per page.
+    for (byte* p = pregion - kStackOverflowProtectedSize;  p < pregion; p += kPageSize) {
+      *p = 0;
     }
   }
 
+  pregion -= kStackOverflowProtectedSize;
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
+
   if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
 
   // Tell the kernel that we won't be needing these pages any more.
+  // NB. madvise will probably write zeroes into the memory (on linux it does).
   if (is_main_stack) {
-    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    if (running_on_intel) {
+      // On the host, it's the whole stack (minus a page to prevent overwrite of stack top).
+      madvise(stack_lowmem, stack_top - stack_lowmem - kPageSize, MADV_DONTNEED);
+    } else {
+      // On Android, just the protected region.
+      madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    }
   }
 }
 
@@ -532,13 +581,17 @@
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
     if (is_main_thread) {
-      // The main thread has a 16K protected region at the bottom.  We need
+      size_t guardsize;
+      pthread_attr_t attributes;
+      CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
+      // The main thread might have protected region at the bottom.  We need
       // to install our own region so we need to move the limits
       // of the stack to make room for it.
-      constexpr uint32_t kDelta = 16 * KB;
-      tlsPtr_.stack_begin += kDelta;
-      tlsPtr_.stack_end += kDelta;
-      tlsPtr_.stack_size -= kDelta;
+      tlsPtr_.stack_begin += guardsize;
+      tlsPtr_.stack_end += guardsize;
+      tlsPtr_.stack_size -= guardsize;
     }
     InstallImplicitProtection(is_main_thread);
   }
@@ -1055,7 +1108,7 @@
   tls32_.state_and_flags.as_struct.state = kNative;
   memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
   std::fill(tlsPtr_.rosalloc_runs,
-            tlsPtr_.rosalloc_runs + gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets,
+            tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBrackets,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
   for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
     tlsPtr_.checkpoint_functions[i] = nullptr;
@@ -1878,6 +1931,8 @@
   QUICK_ENTRY_POINT_INFO(pThrowNoSuchMethod)
   QUICK_ENTRY_POINT_INFO(pThrowNullPointer)
   QUICK_ENTRY_POINT_INFO(pThrowStackOverflow)
+  QUICK_ENTRY_POINT_INFO(pA64Load)
+  QUICK_ENTRY_POINT_INFO(pA64Store)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
diff --git a/runtime/thread.h b/runtime/thread.h
index 4312741..d08c2fc 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -31,7 +31,6 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
-#include "gc/allocator/rosalloc.h"
 #include "globals.h"
 #include "handle_scope.h"
 #include "instruction_set.h"
@@ -47,7 +46,7 @@
 
 namespace gc {
 namespace collector {
-class SemiSpace;
+  class SemiSpace;
 }  // namespace collector
 }  // namespace gc
 
@@ -61,7 +60,6 @@
   template<class T> class PrimitiveArray;
   typedef PrimitiveArray<int32_t> IntArray;
   class StackTraceElement;
-  class StaticStorageBase;
   class Throwable;
 }  // namespace mirror
 class BaseMutex;
@@ -94,6 +92,8 @@
   kCheckpointRequest = 2  // Request that the thread do some checkpoint work and then continue.
 };
 
+static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
+
 class Thread {
  public:
   // How much of the reserved bytes is reserved for incoming signals.
@@ -781,7 +781,7 @@
   void RevokeThreadLocalAllocationStack();
 
   size_t GetThreadLocalBytesAllocated() const {
-    return tlsPtr_.thread_local_pos - tlsPtr_.thread_local_start;
+    return tlsPtr_.thread_local_end - tlsPtr_.thread_local_start;
   }
 
   size_t GetThreadLocalObjectsAllocated() const {
@@ -900,7 +900,7 @@
   // first if possible.
   /***********************************************************************************************/
 
-  struct PACKED(4)  tls_32bit_sized_values {
+  struct PACKED(4) tls_32bit_sized_values {
     // We have no control over the size of 'bool', but want our boolean fields
     // to be 4-byte quantities.
     typedef uint32_t bool32_t;
@@ -1077,7 +1077,7 @@
     size_t thread_local_objects;
 
     // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread.
-    void* rosalloc_runs[gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets];
+    void* rosalloc_runs[kNumRosAllocThreadLocalSizeBrackets];
 
     // Thread-local allocation stack data/routines.
     mirror::Object** thread_local_alloc_stack_top;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 54732fa..b649b62 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -39,6 +39,8 @@
 
 namespace art {
 
+static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
+
 ThreadList::ThreadList()
     : suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
@@ -304,8 +306,8 @@
   DCHECK(self != nullptr);
 
   VLOG(threads) << *self << " SuspendAll starting...";
-
   ATRACE_BEGIN("Suspending mutator threads");
+  uint64_t start_time = NanoTime();
 
   Locks::mutator_lock_->AssertNotHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
@@ -338,6 +340,11 @@
   Locks::mutator_lock_->ExclusiveLock(self);
 #endif
 
+  uint64_t end_time = NanoTime();
+  if (end_time - start_time > kLongThreadSuspendThreshold) {
+    LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(end_time - start_time);
+  }
+
   if (kDebugLocking) {
     // Debug check that all threads are suspended.
     AssertThreadsAreSuspended(self, self);
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index 292c94f..4bd44dc 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -20,6 +20,7 @@
 
 #include "atomic.h"
 #include "common_runtime_test.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/throw_location.cc b/runtime/throw_location.cc
index a1347a4..04abe64 100644
--- a/runtime/throw_location.cc
+++ b/runtime/throw_location.cc
@@ -19,7 +19,6 @@
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 1a450c4..f51b8c4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -30,7 +30,6 @@
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -115,7 +114,7 @@
 static const uint16_t kTraceRecordSizeSingleClock = 10;  // using v2
 static const uint16_t kTraceRecordSizeDualClock   = 14;  // using v3 with two timestamps
 
-ProfilerClockSource Trace::default_clock_source_ = kDefaultProfilerClockSource;
+TraceClockSource Trace::default_clock_source_ = kDefaultTraceClockSource;
 
 Trace* volatile Trace::the_trace_ = NULL;
 pthread_t Trace::sampling_pthread_ = 0U;
@@ -149,34 +148,34 @@
   temp_stack_trace_.reset(stack_trace);
 }
 
-void Trace::SetDefaultClockSource(ProfilerClockSource clock_source) {
+void Trace::SetDefaultClockSource(TraceClockSource clock_source) {
 #if defined(HAVE_POSIX_CLOCKS)
   default_clock_source_ = clock_source;
 #else
-  if (clock_source != kProfilerClockSourceWall) {
+  if (clock_source != kTraceClockSourceWall) {
     LOG(WARNING) << "Ignoring tracing request to use CPU time.";
   }
 #endif
 }
 
-static uint16_t GetTraceVersion(ProfilerClockSource clock_source) {
-  return (clock_source == kProfilerClockSourceDual) ? kTraceVersionDualClock
+static uint16_t GetTraceVersion(TraceClockSource clock_source) {
+  return (clock_source == kTraceClockSourceDual) ? kTraceVersionDualClock
                                                     : kTraceVersionSingleClock;
 }
 
-static uint16_t GetRecordSize(ProfilerClockSource clock_source) {
-  return (clock_source == kProfilerClockSourceDual) ? kTraceRecordSizeDualClock
+static uint16_t GetRecordSize(TraceClockSource clock_source) {
+  return (clock_source == kTraceClockSourceDual) ? kTraceRecordSizeDualClock
                                                     : kTraceRecordSizeSingleClock;
 }
 
 bool Trace::UseThreadCpuClock() {
-  return (clock_source_ == kProfilerClockSourceThreadCpu) ||
-      (clock_source_ == kProfilerClockSourceDual);
+  return (clock_source_ == kTraceClockSourceThreadCpu) ||
+      (clock_source_ == kTraceClockSourceDual);
 }
 
 bool Trace::UseWallClock() {
-  return (clock_source_ == kProfilerClockSourceWall) ||
-      (clock_source_ == kProfilerClockSourceDual);
+  return (clock_source_ == kTraceClockSourceWall) ||
+      (clock_source_ == kTraceClockSourceDual);
 }
 
 static void MeasureClockOverhead(Trace* trace) {
@@ -462,7 +461,7 @@
   cur_offset_.StoreRelaxed(kTraceHeaderLength);
 }
 
-static void DumpBuf(uint8_t* buf, size_t buf_size, ProfilerClockSource clock_source)
+static void DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint8_t* ptr = buf + kTraceHeaderLength;
   uint8_t* end = buf + buf_size;
diff --git a/runtime/trace.h b/runtime/trace.h
index 9c8d35b..d7836b8 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -36,20 +36,9 @@
   class ArtField;
   class ArtMethod;
 }  // namespace mirror
+
 class Thread;
 
-enum ProfilerClockSource {
-  kProfilerClockSourceThreadCpu,
-  kProfilerClockSourceWall,
-  kProfilerClockSourceDual,  // Both wall and thread CPU clocks.
-};
-
-#if defined(HAVE_POSIX_CLOCKS)
-const ProfilerClockSource kDefaultProfilerClockSource = kProfilerClockSourceDual;
-#else
-const ProfilerClockSource kDefaultProfilerClockSource = kProfilerClockSourceWall;
-#endif
-
 enum TracingMode {
   kTracingInactive,
   kMethodTracingActive,
@@ -62,7 +51,7 @@
     kTraceCountAllocs = 1,
   };
 
-  static void SetDefaultClockSource(ProfilerClockSource clock_source);
+  static void SetDefaultClockSource(TraceClockSource clock_source);
 
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
                     bool direct_to_ddms, bool sampling_enabled, int interval_us)
@@ -138,7 +127,7 @@
   static Trace* volatile the_trace_ GUARDED_BY(Locks::trace_lock_);
 
   // The default profiler clock source.
-  static ProfilerClockSource default_clock_source_;
+  static TraceClockSource default_clock_source_;
 
   // Sampling thread, non-zero when sampling.
   static pthread_t sampling_pthread_;
@@ -158,7 +147,7 @@
   // True if traceview should sample instead of instrumenting method entry/exit.
   const bool sampling_enabled_;
 
-  const ProfilerClockSource clock_source_;
+  const TraceClockSource clock_source_;
 
   // Size of buf_.
   const int buffer_size_;
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index a03b389..691aec4 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -20,6 +20,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index d038571..8b1ad39 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -28,6 +28,7 @@
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
+#include "field_helper.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -35,7 +36,6 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string.h"
-#include "object_utils.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "utf-inl.h"
@@ -281,11 +281,6 @@
   return result;
 }
 
-std::string PrettyDescriptor(Primitive::Type type) {
-  std::string descriptor_string(Primitive::Descriptor(type));
-  return PrettyDescriptor(descriptor_string);
-}
-
 std::string PrettyField(mirror::ArtField* f, bool with_type) {
   if (f == NULL) {
     return "null";
diff --git a/runtime/utils.h b/runtime/utils.h
index 448c591..c920050 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -26,7 +26,7 @@
 #include "base/logging.h"
 #include "globals.h"
 #include "instruction_set.h"
-#include "primitive.h"
+#include "base/mutex.h"
 
 #ifdef HAVE_ANDROID_OS
 #include "cutils/properties.h"
@@ -167,6 +167,9 @@
 
 // For rounding integers.
 template<typename T>
+static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) WARN_UNUSED;
+
+template<typename T>
 static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) {
   return
       DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0))
@@ -174,17 +177,26 @@
 }
 
 template<typename T>
+static constexpr T RoundUp(T x, typename TypeIdentity<T>::type n) WARN_UNUSED;
+
+template<typename T>
 static constexpr T RoundUp(T x, typename TypeIdentity<T>::type n) {
   return RoundDown(x + n - 1, n);
 }
 
 // For aligning pointers.
 template<typename T>
+static inline T* AlignDown(T* x, uintptr_t n) WARN_UNUSED;
+
+template<typename T>
 static inline T* AlignDown(T* x, uintptr_t n) {
   return reinterpret_cast<T*>(RoundDown(reinterpret_cast<uintptr_t>(x), n));
 }
 
 template<typename T>
+static inline T* AlignUp(T* x, uintptr_t n) WARN_UNUSED;
+
+template<typename T>
 static inline T* AlignUp(T* x, uintptr_t n) {
   return reinterpret_cast<T*>(RoundUp(reinterpret_cast<uintptr_t>(x), n));
 }
@@ -265,7 +277,6 @@
 std::string PrettyDescriptor(mirror::String* descriptor)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyDescriptor(const std::string& descriptor);
-std::string PrettyDescriptor(Primitive::Type type);
 std::string PrettyDescriptor(mirror::Class* klass)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index eabb993..f1b5afd 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -25,10 +25,12 @@
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex_instruction_visitor.h"
+#include "field_helper.h"
 #include "gc/accounting/card_table-inl.h"
 #include "indenter.h"
 #include "intern_table.h"
 #include "leb128.h"
+#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
@@ -36,7 +38,6 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "register_line-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 9ac04d7..a5895e6 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -19,9 +19,10 @@
 #include <stdio.h>
 #include <memory>
 
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index e24c920..f0729e4 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -24,7 +24,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "reg_type_cache-inl.h"
 #include "scoped_thread_state_change.h"
 
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index ff9edbb..91fba4d 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -21,7 +21,6 @@
 #include "dex_file-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 1935a5b..9dc0df1 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -21,6 +21,8 @@
 #include "base/casts.h"
 #include "common_runtime_test.h"
 #include "reg_type_cache-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verify_object.h b/runtime/verify_object.h
index 6640e0d..8e1653d 100644
--- a/runtime/verify_object.h
+++ b/runtime/verify_object.h
@@ -52,10 +52,10 @@
 static constexpr VerifyObjectMode kVerifyObjectSupport =
     kDefaultVerifyFlags != 0 ? kVerifyObjectModeFast : kVerifyObjectModeDisabled;
 
-void VerifyObject(mirror::Object* obj) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS;
+ALWAYS_INLINE void VerifyObject(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
 // Check that c.getClass() == c.getClass().getClass().
-bool VerifyClassClass(mirror::Class* c) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS;
+ALWAYS_INLINE bool VerifyClassClass(mirror::Class* c) NO_THREAD_SAFETY_ANALYSIS;
 
 }  // namespace art
 
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index d303d1e..96abee2 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -22,6 +22,7 @@
 #include <zlib.h>
 #include <memory>
 
+#include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
 #include "os.h"
 
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index 8e25339..d86735d 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -23,8 +23,23 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
 LOCAL_SRC_FILES := sigchain.cc
+LOCAL_CLANG = $(ART_TARGET_CLANG)
 LOCAL_MODULE:= libsigchain
 LOCAL_SHARED_LIBRARIES := liblog libdl
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common_build.mk
 include $(BUILD_SHARED_LIBRARY)
+
+# Build host library.
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+LOCAL_MODULE_TAGS := optional
+LOCAL_IS_HOST_MODULE := true
+LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_SRC_FILES := sigchain.cc
+LOCAL_MODULE:= libsigchain
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+LOCAL_LDLIBS = -ldl
+LOCAL_MULTILIB := both
+include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 5a5805f..6f93083 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,12 +14,22 @@
  * limitations under the License.
  */
 
+#ifdef HAVE_ANDROID_OS
 #include <android/log.h>
+#else
+#include <stdarg.h>
+#include <iostream>
+#endif
+
 #include <dlfcn.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 
+#if defined(__APPLE__)
+#define _NSIG NSIG
+#endif
+
 namespace art {
 
 class SignalAction {
@@ -67,7 +77,11 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
+#ifdef HAVE_ANDROID_OS
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
+#else
+  std::cout << buf << "\n";
+#endif
   va_end(ap);
 }
 
@@ -104,10 +118,16 @@
   if ((action.sa_flags & SA_SIGINFO) == 0) {
     if (action.sa_handler != NULL) {
       action.sa_handler(sig);
+    } else {
+       signal(sig, SIG_DFL);
+       raise(sig);
     }
   } else {
     if (action.sa_sigaction != NULL) {
       action.sa_sigaction(sig, info, context);
+    } else {
+       signal(sig, SIG_DFL);
+       raise(sig);
     }
   }
 }
diff --git a/sigchainlib/sigchain.h b/sigchainlib/sigchain.h
index f6f2253..a4ce81c 100644
--- a/sigchainlib/sigchain.h
+++ b/sigchainlib/sigchain.h
@@ -18,10 +18,13 @@
 #define ART_SIGCHAINLIB_SIGCHAIN_H_
 
 #include <signal.h>
+
 namespace art {
 
 void ClaimSignalChain(int signal, struct sigaction* oldaction);
+
 void UnclaimSignalChain(int signal);
+
 void InvokeUserSignalHandler(int sig, siginfo_t* info, void* context);
 
 }   // namespace art
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index f412034..1c3c89e 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -34,6 +34,9 @@
     test_Math_max_F();
     test_Math_min_D();
     test_Math_max_D();
+    test_Short_reverseBytes();
+    test_Integer_reverseBytes();
+    test_Long_reverseBytes();
     test_Integer_reverse();
     test_Long_reverse();
     test_StrictMath_abs_I();
@@ -61,9 +64,6 @@
     test_Memory_pokeShort();
     test_Memory_pokeInt();
     test_Memory_pokeLong();
-    test_AtomicBoolean_compareAndSet();
-    test_AtomicInteger_compareAndSet();
-    test_AtomicLong_compareAndSet();
   }
 
   /*
@@ -96,60 +96,6 @@
     Assert.assertNotNull(Thread.currentThread());
   }
 
-  /**
-   * Will test inlining CAS, by inclusion of AtomicBoolean in core.oat.
-   */
-  public static void test_AtomicBoolean_compareAndSet() {
-    java.util.concurrent.atomic.AtomicBoolean ab = new java.util.concurrent.atomic.AtomicBoolean();
-    Assert.assertEquals(ab.compareAndSet(false, false), true);
-    Assert.assertEquals(ab.compareAndSet(true, false), false);
-    Assert.assertEquals(ab.compareAndSet(true, true), false);
-    Assert.assertEquals(ab.compareAndSet(false, true), true);
-    Assert.assertEquals(ab.compareAndSet(false, true), false);
-    Assert.assertEquals(ab.compareAndSet(false, false), false);
-    Assert.assertEquals(ab.compareAndSet(true, true), true);
-    Assert.assertEquals(ab.compareAndSet(true, false), true);
-    Assert.assertEquals(ab.compareAndSet(true, false), false);
-    Assert.assertEquals(ab.compareAndSet(true, true), false);
-    Assert.assertEquals(ab.compareAndSet(false, false), true);
-  }
-
-  /**
-   * Will test inlining CAS, by inclusion of AtomicInteger in core.oat.
-   */
-  public static void test_AtomicInteger_compareAndSet() {
-    java.util.concurrent.atomic.AtomicInteger ab = new java.util.concurrent.atomic.AtomicInteger();
-    Assert.assertEquals(ab.compareAndSet(0, 0), true);
-    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
-    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
-    Assert.assertEquals(ab.compareAndSet(0, 0x12345678), true);
-    Assert.assertEquals(ab.compareAndSet(0, 0x12345678), false);
-    Assert.assertEquals(ab.compareAndSet(0, 0), false);
-    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), true);
-    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), true);
-    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
-    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
-    Assert.assertEquals(ab.compareAndSet(0, 0), true);
-  }
-
-  /**
-   * Will test inlining CAS, by inclusion of AtomicLong in core.oat.
-   */
-  public static void test_AtomicLong_compareAndSet() {
-    java.util.concurrent.atomic.AtomicLong ab = new java.util.concurrent.atomic.AtomicLong();
-    Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
-    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
-    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
-    Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), true);
-    Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), false);
-    Assert.assertEquals(ab.compareAndSet(0l, 0l), false);
-    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), true);
-    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), true);
-    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
-    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
-    Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
-  }
-
   public static void test_String_length() {
     String str0 = "";
     String str1 = "x";
@@ -556,6 +502,34 @@
     Assert.assertEquals(Double.longBitsToDouble(0xfff0000000000000L), Double.NEGATIVE_INFINITY);
   }
 
+  public static void test_Short_reverseBytes() {
+      Assert.assertEquals(Short.reverseBytes((short)0x0000), (short)0x0000);
+      Assert.assertEquals(Short.reverseBytes((short)0xffff), (short)0xffff);
+      Assert.assertEquals(Short.reverseBytes((short)0x8000), (short)0x0080);
+      Assert.assertEquals(Short.reverseBytes((short)0x0080), (short)0x8000);
+      Assert.assertEquals(Short.reverseBytes((short)0x0123), (short)0x2301);
+      Assert.assertEquals(Short.reverseBytes((short)0x4567), (short)0x6745);
+      Assert.assertEquals(Short.reverseBytes((short)0x89ab), (short)0xab89);
+      Assert.assertEquals(Short.reverseBytes((short)0xcdef), (short)0xefcd);
+  }
+
+  public static void test_Integer_reverseBytes() {
+      Assert.assertEquals(Integer.reverseBytes(0x00000000), 0x00000000);
+      Assert.assertEquals(Integer.reverseBytes(0xffffffff), 0xffffffff);
+      Assert.assertEquals(Integer.reverseBytes(0x80000000), 0x00000080);
+      Assert.assertEquals(Integer.reverseBytes(0x00000080), 0x80000000);
+      Assert.assertEquals(Integer.reverseBytes(0x01234567), 0x67452301);
+      Assert.assertEquals(Integer.reverseBytes(0x89abcdef), 0xefcdab89);
+  }
+
+  public static void test_Long_reverseBytes() {
+      Assert.assertEquals(Long.reverseBytes(0x0000000000000000L), 0x0000000000000000L);
+      Assert.assertEquals(Long.reverseBytes(0xffffffffffffffffL), 0xffffffffffffffffL);
+      Assert.assertEquals(Long.reverseBytes(0x8000000000000000L), 0x0000000000000080L);
+      Assert.assertEquals(Long.reverseBytes(0x0000000000000080L), 0x8000000000000000L);
+      Assert.assertEquals(Long.reverseBytes(0x0123456789abcdefL), 0xefcdab8967452301L);
+  }
+
   public static void test_Integer_reverse() {
     Assert.assertEquals(Integer.reverse(1), 0x80000000);
     Assert.assertEquals(Integer.reverse(-1), 0xffffffff);
@@ -570,16 +544,15 @@
     Assert.assertEquals(Long.reverse(1L), 0x8000000000000000L);
     Assert.assertEquals(Long.reverse(-1L), 0xffffffffffffffffL);
     Assert.assertEquals(Long.reverse(0L), 0L);
-    // FIXME: This asserts fail with or without this patch. I have collected
-    // the expected results on my host machine.
-    // Assert.assertEquals(Long.reverse(0x1234567812345678L), 0x1e6a2c481e6a2c48L);
-    // Assert.assertEquals(Long.reverse(0x8765432187654321L), 0x84c2a6e184c2a6e1L);
-    // Assert.assertEquals(Long.reverse(Long.MAX_VALUE), 0xfffffffffffffffeL);
+    Assert.assertEquals(Long.reverse(0x1234567812345678L), 0x1e6a2c481e6a2c48L);
+    Assert.assertEquals(Long.reverse(0x8765432187654321L), 0x84c2a6e184c2a6e1L);
+    Assert.assertEquals(Long.reverse(Long.MAX_VALUE), 0xfffffffffffffffeL);
     Assert.assertEquals(Long.reverse(Long.MIN_VALUE), 1L);
   }
 
   static Object runtime;
   static Method address_of;
+  static Method new_non_movable_array;
   static Method peek_byte;
   static Method peek_short;
   static Method peek_int;
@@ -594,6 +567,7 @@
     Method get_runtime = vm_runtime.getDeclaredMethod("getRuntime");
     runtime = get_runtime.invoke(null);
     address_of = vm_runtime.getDeclaredMethod("addressOf", Object.class);
+    new_non_movable_array = vm_runtime.getDeclaredMethod("newNonMovableArray", Class.class, Integer.TYPE);
 
     Class<?> io_memory = Class.forName("libcore.io.Memory");
     peek_byte = io_memory.getDeclaredMethod("peekByte", Long.TYPE);
@@ -607,7 +581,7 @@
   }
 
   public static void test_Memory_peekByte() throws Exception {
-    byte[] b = new byte [2];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 2);
     b[0] = 0x12;
     b[1] = 0x11;
     long address = (long)address_of.invoke(runtime, b);
@@ -616,7 +590,7 @@
   }
 
   public static void test_Memory_peekShort() throws Exception {
-    byte[] b = new byte [3];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 3);
     b[0] = 0x13;
     b[1] = 0x12;
     b[2] = 0x11;
@@ -626,7 +600,7 @@
   }
 
   public static void test_Memory_peekInt() throws Exception {
-    byte[] b = new byte [5];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 5);
     b[0] = 0x15;
     b[1] = 0x14;
     b[2] = 0x13;
@@ -638,7 +612,7 @@
   }
 
   public static void test_Memory_peekLong() throws Exception {
-    byte[] b = new byte [9];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 9);
     b[0] = 0x19;
     b[1] = 0x18;
     b[2] = 0x17;
@@ -655,7 +629,7 @@
 
   public static void test_Memory_pokeByte() throws Exception {
     byte[] r = {0x11, 0x12};
-    byte[] b = new byte [2];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 2);
     long address = (long)address_of.invoke(runtime, b);
     poke_byte.invoke(null, address, (byte)0x11);
     poke_byte.invoke(null, address + 1, (byte)0x12);
@@ -665,7 +639,7 @@
   public static void test_Memory_pokeShort() throws Exception {
     byte[] ra = {0x12, 0x11, 0x13};
     byte[] ru = {0x12, 0x22, 0x21};
-    byte[] b = new byte [3];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 3);
     long address = (long)address_of.invoke(runtime, b);
 
     // Aligned write
@@ -681,7 +655,7 @@
   public static void test_Memory_pokeInt() throws Exception {
     byte[] ra = {0x14, 0x13, 0x12, 0x11, 0x15};
     byte[] ru = {0x14, 0x24, 0x23, 0x22, 0x21};
-    byte[] b = new byte [5];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 5);
     long address = (long)address_of.invoke(runtime, b);
 
     b[4] = 0x15;
@@ -695,7 +669,7 @@
   public static void test_Memory_pokeLong() throws Exception {
     byte[] ra = {0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x19};
     byte[] ru = {0x18, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21};
-    byte[] b = new byte [9];
+    byte[] b = (byte[])new_non_movable_array.invoke(runtime, Byte.TYPE, 9);
     long address = (long)address_of.invoke(runtime, b);
 
     b[8] = 0x19;
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 0f7527c..18bc674 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -9638,6 +9638,7 @@
     private static int ifGezThen7Else4(int i) { return (i >= 0) ? 7 : 4; }
     private static int ifGtzThen2Else9(int i) { return (i > 0) ? 2 : 9; }
     private static int ifLezThen8Else0(int i) { return (i <= 0) ? 8 : 0; }
+    private static int ifGtzThen8Else9(int i) { return (i > 0) ? 8 : 9; }
 
     private static int ifEqz(int src, int thn, int els) { return (src == 0) ? thn : els; }
     private static int ifNez(int src, int thn, int els) { return (src != 0) ? thn : els; }
@@ -9714,6 +9715,8 @@
             ifLez(-1, 116, 216), 116,
             ifLez(0, 117, 217), 117,
             ifLez(1, 118, 218), 218,
+            ifGtzThen8Else9(0), 9,
+            ifGtzThen8Else9(1), 8
         };
 
         boolean success = true;
diff --git a/test/401-optimizing-compiler/expected.txt b/test/401-optimizing-compiler/expected.txt
index 97492a4..d6ef64b 100644
--- a/test/401-optimizing-compiler/expected.txt
+++ b/test/401-optimizing-compiler/expected.txt
@@ -11,3 +11,4 @@
 Forced GC
 Forced GC
 Forced GC
+Forced GC
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index e5706a5..a5192e1 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -71,6 +71,10 @@
     if (m.$opt$TestOtherParameter(new Main()) == m) {
       throw new Error("Unexpected value returned");
     }
+
+    if (m.$opt$TestReturnNewObject(m) == m) {
+      throw new Error("Unexpected value returned");
+    }
   }
 
   static int $opt$TestInvokeIntParameter(int param) {
@@ -108,6 +112,12 @@
     return other;
   }
 
+  Object $opt$TestReturnNewObject(Object other) {
+    Object o = new Object();
+    forceGCStaticMethod();
+    return o;
+  }
+
   public static void $opt$TestInvokeStatic() {
     printStaticMethod();
     printStaticMethodWith2Args(1, 2);
diff --git a/test/406-fields/expected.txt b/test/406-fields/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/406-fields/expected.txt
diff --git a/test/406-fields/info.txt b/test/406-fields/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/406-fields/info.txt
diff --git a/test/406-fields/src/Main.java b/test/406-fields/src/Main.java
new file mode 100644
index 0000000..3e94e42
--- /dev/null
+++ b/test/406-fields/src/Main.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple test for field accesses.
+
+public class Main extends TestCase {
+  public static void main(String[] args) {
+    $opt$testAll();
+  }
+
+  static void $opt$testAll() {
+    AllFields fields = new AllFields();
+
+    assertEquals(false, fields.iZ);
+    assertEquals(0, fields.iB);
+    assertEquals(0, fields.iC);
+    assertEquals(0, fields.iI);
+    assertEquals(0, fields.iJ);
+    assertEquals(0, fields.iS);
+    assertNull(fields.iObject);
+
+    long longValue = -1122198787987987987L;
+    fields.iZ = true;
+    fields.iB = -2;
+    fields.iC = 'c';
+    fields.iI = 42;
+    fields.iJ = longValue;
+    fields.iS = 68;
+    fields.iObject = fields;
+
+    assertEquals(true, fields.iZ);
+    assertEquals(-2, fields.iB);
+    assertEquals('c', fields.iC);
+    assertEquals(42, fields.iI);
+    assertEquals(longValue, fields.iJ);
+    assertEquals(68, fields.iS);
+    assertEquals(fields, fields.iObject);
+  }
+
+  static class AllFields {
+    boolean iZ;
+    byte iB;
+    char iC;
+    double iD;
+    float iF;
+    int iI;
+    long iJ;
+    short iS;
+    Object iObject;
+  }
+}
diff --git a/test/406-fields/src/TestCase.java b/test/406-fields/src/TestCase.java
new file mode 100644
index 0000000..ef77f71
--- /dev/null
+++ b/test/406-fields/src/TestCase.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Common superclass for test cases.
+ */
+
+import java.util.Arrays;
+
+public abstract class TestCase {
+  public static void assertSame(Object expected, Object value) {
+    if (expected != value) {
+      throw new AssertionError("Objects are not the same: expected " +
+          String.valueOf(expected) + ", got " + String.valueOf(value));
+    }
+  }
+
+  public static void assertNotSame(Object expected, Object value) {
+    if (expected == value) {
+      throw new AssertionError(
+          "Objects are the same: " + String.valueOf(expected));
+    }
+  }
+
+  public static void assertEquals(String message, int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertTrue(String message, boolean condition) {
+    if (!condition) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertTrue(boolean condition) {
+    assertTrue("Expected true", condition);
+  }
+
+  public static void assertFalse(String message, boolean condition) {
+    if (condition) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertFalse(boolean condition) {
+    assertFalse("Expected false", condition);
+  }
+
+  public static void assertEquals(Object expected, Object actual) {
+    if (!expected.equals(actual)) {
+      String msg = "Expected \"" + expected + "\" but got \"" + actual + "\"";
+      throw new AssertionError(msg);
+    }
+  }
+
+  public static void assertNotEquals(int expected, int actual) {
+    if (expected == actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertNotEquals(Object expected, Object actual) {
+    if (expected.equals(actual)) {
+      String msg = "Objects are the same: " + String.valueOf(expected);
+      throw new AssertionError(msg);
+    }
+  }
+
+  public static <T> void assertArrayEquals(T[] actual, T... expected) {
+      assertTrue(Arrays.equals(expected, actual));
+  }
+
+  public static void assertEquals(
+      String message, Object expected, Object actual) {
+    if (!expected.equals(actual)) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertEquals(
+      String message, long expected, long actual) {
+    if (expected != actual) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertEquals(
+      String message, boolean expected, boolean actual) {
+    if (expected != actual) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertEquals(boolean expected, boolean actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertEquals(
+      String message, float expected, float actual) {
+    if (expected != actual) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertEquals(float expected, float actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertEquals(float expected, float actual,
+                                  float tolerance) {
+    if ((actual < expected - tolerance) || (expected + tolerance < actual)) {
+      throw new AssertionError("Expected " + expected + " got " + actual +
+          " tolerance " + tolerance);
+    }
+  }
+
+  public static void assertEquals(
+      String message, double expected, double actual) {
+    if (expected != actual) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertEquals(double expected, double actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertEquals(double expected, double actual,
+                                  double tolerance) {
+    if ((actual < expected - tolerance) || (expected + tolerance < actual)) {
+      throw new AssertionError("Expected " + expected + " got " + actual +
+          " tolerance " + tolerance);
+    }
+  }
+
+  public static void assertSame(
+      String message, Object expected, Object actual) {
+    if (expected != actual) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertNull(String message, Object object) {
+    if (object != null) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertNull(Object object) {
+    assertNull("Expected null", object);
+  }
+
+  public static void assertNotNull(String message, Object object) {
+    if (object == null) {
+      throw new AssertionError(message);
+    }
+  }
+
+  public static void assertNotNull(Object object) {
+    assertNotNull("Expected non-null", object);
+  }
+
+  public static void fail(String msg) {
+    throw new AssertionError(msg);
+  }
+}
diff --git a/test/701-easy-div-rem/expected.txt b/test/701-easy-div-rem/expected.txt
new file mode 100644
index 0000000..97be343
--- /dev/null
+++ b/test/701-easy-div-rem/expected.txt
@@ -0,0 +1,8 @@
+Begin
+Int: checking some equally spaced dividends...
+Int: checking small dividends...
+Int: checking big dividends...
+Long: checking some equally spaced dividends...
+Long: checking small dividends...
+Long: checking big dividends...
+End
diff --git a/test/701-easy-div-rem/genMain.py b/test/701-easy-div-rem/genMain.py
new file mode 100644
index 0000000..80eac34
--- /dev/null
+++ b/test/701-easy-div-rem/genMain.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+all_tests = [
+    ({'@INT@': 'int', '@SUFFIX@':''},
+     [('CheckDiv', 'idiv_by_pow2_', [2**i for i in range(31)]),
+      ('CheckDiv', 'idiv_by_small_', [i for i in range(3, 16) if i not in (4, 8)]),
+      ('CheckRem', 'irem_by_pow2_', [2**i for i in range(31)])]),
+    ({'@INT@': 'long', '@SUFFIX@': 'l'},
+     [('CheckDiv', 'ldiv_by_pow2_', [2**i for i in range(63)]),
+      ('CheckDiv', 'ldiv_by_small_', [i for i in range(3, 16) if i not in (4, 8)]),
+      ('CheckRem', 'lrem_by_pow2_', [2**i for i in range(63)])])
+]
+
+def subst_vars(variables, text):
+    '''Substitute variables in text.'''
+    for key, value in variables.iteritems():
+        text = text.replace(str(key), str(value))
+    return text
+
+# Generate all the function bodies (in decls) and all the function calls (in calls).
+decls, calls = '', {}
+for default_vars, tests in all_tests:
+    local_vars = default_vars.copy()
+    int_type = local_vars['@INT@']
+    for checker, name, values in tests:
+        local_vars['@CHECKER@'] = checker
+        for i, value in enumerate(values):
+            local_vars['@NAME@'] = name + str(i)
+            local_vars['@VALUE@'] = value
+            local_vars['@OP@'] = '/' if 'div' in name else '%'
+
+            # Function body.
+            decls += subst_vars(local_vars, '''
+    public static @INT@ @NAME@(@INT@ x) {return x @OP@ @VALUE@@SUFFIX@;}''')
+
+            # Function call and test.
+            calls[int_type] = calls.get(int_type, '') + subst_vars(local_vars, '''
+        @INT@@CHECKER@("@NAME@", @NAME@(x), x, @VALUE@@SUFFIX@);''')
+
+# Generate the checkers.
+checkers = ''
+local_vars = {}
+for int_type in ('int', 'long'):
+    local_vars['@INT@'] = int_type
+    for op, op_name in (('/', 'Div'), ('%', 'Rem')):
+        local_vars['@OP@'] = op
+        local_vars['@OP_NAME@'] = op_name
+        checkers += subst_vars(local_vars, '''
+    public static void @INT@Check@OP_NAME@(String desc, @INT@ result, @INT@ dividend, @INT@ divisor) {
+        @INT@ correct_result = dividend @OP@ divisor;
+        if (result != correct_result) {
+            reportError(desc + "(" + dividend + ") == " + result +
+                        " should be " + correct_result);
+        }
+    }''')
+
+
+code = \
+'''/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static int num_errors = 0;
+
+    public static void reportError(String message) {
+        if (num_errors == 10) {
+            System.out.println("Omitting other error messages...");
+        } else if (num_errors < 10) {
+            System.out.println(message);
+        }
+        num_errors += 1;
+    }
+%s
+%s
+
+    public static void intCheckAll(int x) {%s
+    }
+
+    public static void longCheckAll(long x) {%s
+    }
+
+    public static void main(String[] args) {
+      int i;
+      long l;
+
+      System.out.println("Begin");
+
+      System.out.println("Int: checking some equally spaced dividends...");
+      for (i = -1000; i < 1000; i += 300) {
+          intCheckAll(i);
+          intCheckAll(-i);
+      }
+
+      System.out.println("Int: checking small dividends...");
+      for (i = 1; i < 100; i += 1) {
+          intCheckAll(i);
+          intCheckAll(-i);
+      }
+
+      System.out.println("Int: checking big dividends...");
+      for (i = 0; i < 100; i += 1) {
+          intCheckAll(Integer.MAX_VALUE - i);
+          intCheckAll(Integer.MIN_VALUE + i);
+      }
+
+      System.out.println("Long: checking some equally spaced dividends...");
+      for (l = 0l; l < 1000000000000l; l += 300000000000l) {
+          longCheckAll(l);
+          longCheckAll(-l);
+      }
+
+      System.out.println("Long: checking small dividends...");
+      for (l = 1l; l < 100l; l += 1l) {
+          longCheckAll(l);
+          longCheckAll(-l);
+      }
+
+      System.out.println("Long: checking big dividends...");
+      for (l = 0l; l < 100l; l += 1l) {
+          longCheckAll(Long.MAX_VALUE - l);
+          longCheckAll(Long.MIN_VALUE + l);
+      }
+
+      System.out.println("End");
+    }
+}
+''' % (checkers, decls, calls['int'], calls['long'])
+
+with open('src/Main.java', 'w') as f:
+    f.write(code)
diff --git a/test/701-easy-div-rem/info.txt b/test/701-easy-div-rem/info.txt
new file mode 100644
index 0000000..56d1786
--- /dev/null
+++ b/test/701-easy-div-rem/info.txt
@@ -0,0 +1 @@
+Simple tests for checking easy division/reminder for int and longs.
diff --git a/test/701-easy-div-rem/src/Main.java b/test/701-easy-div-rem/src/Main.java
new file mode 100644
index 0000000..f995f61
--- /dev/null
+++ b/test/701-easy-div-rem/src/Main.java
@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static int num_errors = 0;
+
+    public static void reportError(String message) {
+        if (num_errors == 10) {
+            System.out.println("Omitting other error messages...");
+        } else if (num_errors < 10) {
+            System.out.println(message);
+        }
+        num_errors += 1;
+    }
+
+    public static void intCheckDiv(String desc, int result, int dividend, int divisor) {
+        int correct_result = dividend / divisor;
+        if (result != correct_result) {
+            reportError(desc + "(" + dividend + ") == " + result +
+                        " should be " + correct_result);
+        }
+    }
+    public static void intCheckRem(String desc, int result, int dividend, int divisor) {
+        int correct_result = dividend % divisor;
+        if (result != correct_result) {
+            reportError(desc + "(" + dividend + ") == " + result +
+                        " should be " + correct_result);
+        }
+    }
+    public static void longCheckDiv(String desc, long result, long dividend, long divisor) {
+        long correct_result = dividend / divisor;
+        if (result != correct_result) {
+            reportError(desc + "(" + dividend + ") == " + result +
+                        " should be " + correct_result);
+        }
+    }
+    public static void longCheckRem(String desc, long result, long dividend, long divisor) {
+        long correct_result = dividend % divisor;
+        if (result != correct_result) {
+            reportError(desc + "(" + dividend + ") == " + result +
+                        " should be " + correct_result);
+        }
+    }
+
+    public static int idiv_by_pow2_0(int x) {return x / 1;}
+    public static int idiv_by_pow2_1(int x) {return x / 2;}
+    public static int idiv_by_pow2_2(int x) {return x / 4;}
+    public static int idiv_by_pow2_3(int x) {return x / 8;}
+    public static int idiv_by_pow2_4(int x) {return x / 16;}
+    public static int idiv_by_pow2_5(int x) {return x / 32;}
+    public static int idiv_by_pow2_6(int x) {return x / 64;}
+    public static int idiv_by_pow2_7(int x) {return x / 128;}
+    public static int idiv_by_pow2_8(int x) {return x / 256;}
+    public static int idiv_by_pow2_9(int x) {return x / 512;}
+    public static int idiv_by_pow2_10(int x) {return x / 1024;}
+    public static int idiv_by_pow2_11(int x) {return x / 2048;}
+    public static int idiv_by_pow2_12(int x) {return x / 4096;}
+    public static int idiv_by_pow2_13(int x) {return x / 8192;}
+    public static int idiv_by_pow2_14(int x) {return x / 16384;}
+    public static int idiv_by_pow2_15(int x) {return x / 32768;}
+    public static int idiv_by_pow2_16(int x) {return x / 65536;}
+    public static int idiv_by_pow2_17(int x) {return x / 131072;}
+    public static int idiv_by_pow2_18(int x) {return x / 262144;}
+    public static int idiv_by_pow2_19(int x) {return x / 524288;}
+    public static int idiv_by_pow2_20(int x) {return x / 1048576;}
+    public static int idiv_by_pow2_21(int x) {return x / 2097152;}
+    public static int idiv_by_pow2_22(int x) {return x / 4194304;}
+    public static int idiv_by_pow2_23(int x) {return x / 8388608;}
+    public static int idiv_by_pow2_24(int x) {return x / 16777216;}
+    public static int idiv_by_pow2_25(int x) {return x / 33554432;}
+    public static int idiv_by_pow2_26(int x) {return x / 67108864;}
+    public static int idiv_by_pow2_27(int x) {return x / 134217728;}
+    public static int idiv_by_pow2_28(int x) {return x / 268435456;}
+    public static int idiv_by_pow2_29(int x) {return x / 536870912;}
+    public static int idiv_by_pow2_30(int x) {return x / 1073741824;}
+    public static int idiv_by_small_0(int x) {return x / 3;}
+    public static int idiv_by_small_1(int x) {return x / 5;}
+    public static int idiv_by_small_2(int x) {return x / 6;}
+    public static int idiv_by_small_3(int x) {return x / 7;}
+    public static int idiv_by_small_4(int x) {return x / 9;}
+    public static int idiv_by_small_5(int x) {return x / 10;}
+    public static int idiv_by_small_6(int x) {return x / 11;}
+    public static int idiv_by_small_7(int x) {return x / 12;}
+    public static int idiv_by_small_8(int x) {return x / 13;}
+    public static int idiv_by_small_9(int x) {return x / 14;}
+    public static int idiv_by_small_10(int x) {return x / 15;}
+    public static int irem_by_pow2_0(int x) {return x % 1;}
+    public static int irem_by_pow2_1(int x) {return x % 2;}
+    public static int irem_by_pow2_2(int x) {return x % 4;}
+    public static int irem_by_pow2_3(int x) {return x % 8;}
+    public static int irem_by_pow2_4(int x) {return x % 16;}
+    public static int irem_by_pow2_5(int x) {return x % 32;}
+    public static int irem_by_pow2_6(int x) {return x % 64;}
+    public static int irem_by_pow2_7(int x) {return x % 128;}
+    public static int irem_by_pow2_8(int x) {return x % 256;}
+    public static int irem_by_pow2_9(int x) {return x % 512;}
+    public static int irem_by_pow2_10(int x) {return x % 1024;}
+    public static int irem_by_pow2_11(int x) {return x % 2048;}
+    public static int irem_by_pow2_12(int x) {return x % 4096;}
+    public static int irem_by_pow2_13(int x) {return x % 8192;}
+    public static int irem_by_pow2_14(int x) {return x % 16384;}
+    public static int irem_by_pow2_15(int x) {return x % 32768;}
+    public static int irem_by_pow2_16(int x) {return x % 65536;}
+    public static int irem_by_pow2_17(int x) {return x % 131072;}
+    public static int irem_by_pow2_18(int x) {return x % 262144;}
+    public static int irem_by_pow2_19(int x) {return x % 524288;}
+    public static int irem_by_pow2_20(int x) {return x % 1048576;}
+    public static int irem_by_pow2_21(int x) {return x % 2097152;}
+    public static int irem_by_pow2_22(int x) {return x % 4194304;}
+    public static int irem_by_pow2_23(int x) {return x % 8388608;}
+    public static int irem_by_pow2_24(int x) {return x % 16777216;}
+    public static int irem_by_pow2_25(int x) {return x % 33554432;}
+    public static int irem_by_pow2_26(int x) {return x % 67108864;}
+    public static int irem_by_pow2_27(int x) {return x % 134217728;}
+    public static int irem_by_pow2_28(int x) {return x % 268435456;}
+    public static int irem_by_pow2_29(int x) {return x % 536870912;}
+    public static int irem_by_pow2_30(int x) {return x % 1073741824;}
+    public static long ldiv_by_pow2_0(long x) {return x / 1l;}
+    public static long ldiv_by_pow2_1(long x) {return x / 2l;}
+    public static long ldiv_by_pow2_2(long x) {return x / 4l;}
+    public static long ldiv_by_pow2_3(long x) {return x / 8l;}
+    public static long ldiv_by_pow2_4(long x) {return x / 16l;}
+    public static long ldiv_by_pow2_5(long x) {return x / 32l;}
+    public static long ldiv_by_pow2_6(long x) {return x / 64l;}
+    public static long ldiv_by_pow2_7(long x) {return x / 128l;}
+    public static long ldiv_by_pow2_8(long x) {return x / 256l;}
+    public static long ldiv_by_pow2_9(long x) {return x / 512l;}
+    public static long ldiv_by_pow2_10(long x) {return x / 1024l;}
+    public static long ldiv_by_pow2_11(long x) {return x / 2048l;}
+    public static long ldiv_by_pow2_12(long x) {return x / 4096l;}
+    public static long ldiv_by_pow2_13(long x) {return x / 8192l;}
+    public static long ldiv_by_pow2_14(long x) {return x / 16384l;}
+    public static long ldiv_by_pow2_15(long x) {return x / 32768l;}
+    public static long ldiv_by_pow2_16(long x) {return x / 65536l;}
+    public static long ldiv_by_pow2_17(long x) {return x / 131072l;}
+    public static long ldiv_by_pow2_18(long x) {return x / 262144l;}
+    public static long ldiv_by_pow2_19(long x) {return x / 524288l;}
+    public static long ldiv_by_pow2_20(long x) {return x / 1048576l;}
+    public static long ldiv_by_pow2_21(long x) {return x / 2097152l;}
+    public static long ldiv_by_pow2_22(long x) {return x / 4194304l;}
+    public static long ldiv_by_pow2_23(long x) {return x / 8388608l;}
+    public static long ldiv_by_pow2_24(long x) {return x / 16777216l;}
+    public static long ldiv_by_pow2_25(long x) {return x / 33554432l;}
+    public static long ldiv_by_pow2_26(long x) {return x / 67108864l;}
+    public static long ldiv_by_pow2_27(long x) {return x / 134217728l;}
+    public static long ldiv_by_pow2_28(long x) {return x / 268435456l;}
+    public static long ldiv_by_pow2_29(long x) {return x / 536870912l;}
+    public static long ldiv_by_pow2_30(long x) {return x / 1073741824l;}
+    public static long ldiv_by_pow2_31(long x) {return x / 2147483648l;}
+    public static long ldiv_by_pow2_32(long x) {return x / 4294967296l;}
+    public static long ldiv_by_pow2_33(long x) {return x / 8589934592l;}
+    public static long ldiv_by_pow2_34(long x) {return x / 17179869184l;}
+    public static long ldiv_by_pow2_35(long x) {return x / 34359738368l;}
+    public static long ldiv_by_pow2_36(long x) {return x / 68719476736l;}
+    public static long ldiv_by_pow2_37(long x) {return x / 137438953472l;}
+    public static long ldiv_by_pow2_38(long x) {return x / 274877906944l;}
+    public static long ldiv_by_pow2_39(long x) {return x / 549755813888l;}
+    public static long ldiv_by_pow2_40(long x) {return x / 1099511627776l;}
+    public static long ldiv_by_pow2_41(long x) {return x / 2199023255552l;}
+    public static long ldiv_by_pow2_42(long x) {return x / 4398046511104l;}
+    public static long ldiv_by_pow2_43(long x) {return x / 8796093022208l;}
+    public static long ldiv_by_pow2_44(long x) {return x / 17592186044416l;}
+    public static long ldiv_by_pow2_45(long x) {return x / 35184372088832l;}
+    public static long ldiv_by_pow2_46(long x) {return x / 70368744177664l;}
+    public static long ldiv_by_pow2_47(long x) {return x / 140737488355328l;}
+    public static long ldiv_by_pow2_48(long x) {return x / 281474976710656l;}
+    public static long ldiv_by_pow2_49(long x) {return x / 562949953421312l;}
+    public static long ldiv_by_pow2_50(long x) {return x / 1125899906842624l;}
+    public static long ldiv_by_pow2_51(long x) {return x / 2251799813685248l;}
+    public static long ldiv_by_pow2_52(long x) {return x / 4503599627370496l;}
+    public static long ldiv_by_pow2_53(long x) {return x / 9007199254740992l;}
+    public static long ldiv_by_pow2_54(long x) {return x / 18014398509481984l;}
+    public static long ldiv_by_pow2_55(long x) {return x / 36028797018963968l;}
+    public static long ldiv_by_pow2_56(long x) {return x / 72057594037927936l;}
+    public static long ldiv_by_pow2_57(long x) {return x / 144115188075855872l;}
+    public static long ldiv_by_pow2_58(long x) {return x / 288230376151711744l;}
+    public static long ldiv_by_pow2_59(long x) {return x / 576460752303423488l;}
+    public static long ldiv_by_pow2_60(long x) {return x / 1152921504606846976l;}
+    public static long ldiv_by_pow2_61(long x) {return x / 2305843009213693952l;}
+    public static long ldiv_by_pow2_62(long x) {return x / 4611686018427387904l;}
+    public static long ldiv_by_small_0(long x) {return x / 3l;}
+    public static long ldiv_by_small_1(long x) {return x / 5l;}
+    public static long ldiv_by_small_2(long x) {return x / 6l;}
+    public static long ldiv_by_small_3(long x) {return x / 7l;}
+    public static long ldiv_by_small_4(long x) {return x / 9l;}
+    public static long ldiv_by_small_5(long x) {return x / 10l;}
+    public static long ldiv_by_small_6(long x) {return x / 11l;}
+    public static long ldiv_by_small_7(long x) {return x / 12l;}
+    public static long ldiv_by_small_8(long x) {return x / 13l;}
+    public static long ldiv_by_small_9(long x) {return x / 14l;}
+    public static long ldiv_by_small_10(long x) {return x / 15l;}
+    public static long lrem_by_pow2_0(long x) {return x % 1l;}
+    public static long lrem_by_pow2_1(long x) {return x % 2l;}
+    public static long lrem_by_pow2_2(long x) {return x % 4l;}
+    public static long lrem_by_pow2_3(long x) {return x % 8l;}
+    public static long lrem_by_pow2_4(long x) {return x % 16l;}
+    public static long lrem_by_pow2_5(long x) {return x % 32l;}
+    public static long lrem_by_pow2_6(long x) {return x % 64l;}
+    public static long lrem_by_pow2_7(long x) {return x % 128l;}
+    public static long lrem_by_pow2_8(long x) {return x % 256l;}
+    public static long lrem_by_pow2_9(long x) {return x % 512l;}
+    public static long lrem_by_pow2_10(long x) {return x % 1024l;}
+    public static long lrem_by_pow2_11(long x) {return x % 2048l;}
+    public static long lrem_by_pow2_12(long x) {return x % 4096l;}
+    public static long lrem_by_pow2_13(long x) {return x % 8192l;}
+    public static long lrem_by_pow2_14(long x) {return x % 16384l;}
+    public static long lrem_by_pow2_15(long x) {return x % 32768l;}
+    public static long lrem_by_pow2_16(long x) {return x % 65536l;}
+    public static long lrem_by_pow2_17(long x) {return x % 131072l;}
+    public static long lrem_by_pow2_18(long x) {return x % 262144l;}
+    public static long lrem_by_pow2_19(long x) {return x % 524288l;}
+    public static long lrem_by_pow2_20(long x) {return x % 1048576l;}
+    public static long lrem_by_pow2_21(long x) {return x % 2097152l;}
+    public static long lrem_by_pow2_22(long x) {return x % 4194304l;}
+    public static long lrem_by_pow2_23(long x) {return x % 8388608l;}
+    public static long lrem_by_pow2_24(long x) {return x % 16777216l;}
+    public static long lrem_by_pow2_25(long x) {return x % 33554432l;}
+    public static long lrem_by_pow2_26(long x) {return x % 67108864l;}
+    public static long lrem_by_pow2_27(long x) {return x % 134217728l;}
+    public static long lrem_by_pow2_28(long x) {return x % 268435456l;}
+    public static long lrem_by_pow2_29(long x) {return x % 536870912l;}
+    public static long lrem_by_pow2_30(long x) {return x % 1073741824l;}
+    public static long lrem_by_pow2_31(long x) {return x % 2147483648l;}
+    public static long lrem_by_pow2_32(long x) {return x % 4294967296l;}
+    public static long lrem_by_pow2_33(long x) {return x % 8589934592l;}
+    public static long lrem_by_pow2_34(long x) {return x % 17179869184l;}
+    public static long lrem_by_pow2_35(long x) {return x % 34359738368l;}
+    public static long lrem_by_pow2_36(long x) {return x % 68719476736l;}
+    public static long lrem_by_pow2_37(long x) {return x % 137438953472l;}
+    public static long lrem_by_pow2_38(long x) {return x % 274877906944l;}
+    public static long lrem_by_pow2_39(long x) {return x % 549755813888l;}
+    public static long lrem_by_pow2_40(long x) {return x % 1099511627776l;}
+    public static long lrem_by_pow2_41(long x) {return x % 2199023255552l;}
+    public static long lrem_by_pow2_42(long x) {return x % 4398046511104l;}
+    public static long lrem_by_pow2_43(long x) {return x % 8796093022208l;}
+    public static long lrem_by_pow2_44(long x) {return x % 17592186044416l;}
+    public static long lrem_by_pow2_45(long x) {return x % 35184372088832l;}
+    public static long lrem_by_pow2_46(long x) {return x % 70368744177664l;}
+    public static long lrem_by_pow2_47(long x) {return x % 140737488355328l;}
+    public static long lrem_by_pow2_48(long x) {return x % 281474976710656l;}
+    public static long lrem_by_pow2_49(long x) {return x % 562949953421312l;}
+    public static long lrem_by_pow2_50(long x) {return x % 1125899906842624l;}
+    public static long lrem_by_pow2_51(long x) {return x % 2251799813685248l;}
+    public static long lrem_by_pow2_52(long x) {return x % 4503599627370496l;}
+    public static long lrem_by_pow2_53(long x) {return x % 9007199254740992l;}
+    public static long lrem_by_pow2_54(long x) {return x % 18014398509481984l;}
+    public static long lrem_by_pow2_55(long x) {return x % 36028797018963968l;}
+    public static long lrem_by_pow2_56(long x) {return x % 72057594037927936l;}
+    public static long lrem_by_pow2_57(long x) {return x % 144115188075855872l;}
+    public static long lrem_by_pow2_58(long x) {return x % 288230376151711744l;}
+    public static long lrem_by_pow2_59(long x) {return x % 576460752303423488l;}
+    public static long lrem_by_pow2_60(long x) {return x % 1152921504606846976l;}
+    public static long lrem_by_pow2_61(long x) {return x % 2305843009213693952l;}
+    public static long lrem_by_pow2_62(long x) {return x % 4611686018427387904l;}
+
+    public static void intCheckAll(int x) {
+        intCheckDiv("idiv_by_pow2_0", idiv_by_pow2_0(x), x, 1);
+        intCheckDiv("idiv_by_pow2_1", idiv_by_pow2_1(x), x, 2);
+        intCheckDiv("idiv_by_pow2_2", idiv_by_pow2_2(x), x, 4);
+        intCheckDiv("idiv_by_pow2_3", idiv_by_pow2_3(x), x, 8);
+        intCheckDiv("idiv_by_pow2_4", idiv_by_pow2_4(x), x, 16);
+        intCheckDiv("idiv_by_pow2_5", idiv_by_pow2_5(x), x, 32);
+        intCheckDiv("idiv_by_pow2_6", idiv_by_pow2_6(x), x, 64);
+        intCheckDiv("idiv_by_pow2_7", idiv_by_pow2_7(x), x, 128);
+        intCheckDiv("idiv_by_pow2_8", idiv_by_pow2_8(x), x, 256);
+        intCheckDiv("idiv_by_pow2_9", idiv_by_pow2_9(x), x, 512);
+        intCheckDiv("idiv_by_pow2_10", idiv_by_pow2_10(x), x, 1024);
+        intCheckDiv("idiv_by_pow2_11", idiv_by_pow2_11(x), x, 2048);
+        intCheckDiv("idiv_by_pow2_12", idiv_by_pow2_12(x), x, 4096);
+        intCheckDiv("idiv_by_pow2_13", idiv_by_pow2_13(x), x, 8192);
+        intCheckDiv("idiv_by_pow2_14", idiv_by_pow2_14(x), x, 16384);
+        intCheckDiv("idiv_by_pow2_15", idiv_by_pow2_15(x), x, 32768);
+        intCheckDiv("idiv_by_pow2_16", idiv_by_pow2_16(x), x, 65536);
+        intCheckDiv("idiv_by_pow2_17", idiv_by_pow2_17(x), x, 131072);
+        intCheckDiv("idiv_by_pow2_18", idiv_by_pow2_18(x), x, 262144);
+        intCheckDiv("idiv_by_pow2_19", idiv_by_pow2_19(x), x, 524288);
+        intCheckDiv("idiv_by_pow2_20", idiv_by_pow2_20(x), x, 1048576);
+        intCheckDiv("idiv_by_pow2_21", idiv_by_pow2_21(x), x, 2097152);
+        intCheckDiv("idiv_by_pow2_22", idiv_by_pow2_22(x), x, 4194304);
+        intCheckDiv("idiv_by_pow2_23", idiv_by_pow2_23(x), x, 8388608);
+        intCheckDiv("idiv_by_pow2_24", idiv_by_pow2_24(x), x, 16777216);
+        intCheckDiv("idiv_by_pow2_25", idiv_by_pow2_25(x), x, 33554432);
+        intCheckDiv("idiv_by_pow2_26", idiv_by_pow2_26(x), x, 67108864);
+        intCheckDiv("idiv_by_pow2_27", idiv_by_pow2_27(x), x, 134217728);
+        intCheckDiv("idiv_by_pow2_28", idiv_by_pow2_28(x), x, 268435456);
+        intCheckDiv("idiv_by_pow2_29", idiv_by_pow2_29(x), x, 536870912);
+        intCheckDiv("idiv_by_pow2_30", idiv_by_pow2_30(x), x, 1073741824);
+        intCheckDiv("idiv_by_small_0", idiv_by_small_0(x), x, 3);
+        intCheckDiv("idiv_by_small_1", idiv_by_small_1(x), x, 5);
+        intCheckDiv("idiv_by_small_2", idiv_by_small_2(x), x, 6);
+        intCheckDiv("idiv_by_small_3", idiv_by_small_3(x), x, 7);
+        intCheckDiv("idiv_by_small_4", idiv_by_small_4(x), x, 9);
+        intCheckDiv("idiv_by_small_5", idiv_by_small_5(x), x, 10);
+        intCheckDiv("idiv_by_small_6", idiv_by_small_6(x), x, 11);
+        intCheckDiv("idiv_by_small_7", idiv_by_small_7(x), x, 12);
+        intCheckDiv("idiv_by_small_8", idiv_by_small_8(x), x, 13);
+        intCheckDiv("idiv_by_small_9", idiv_by_small_9(x), x, 14);
+        intCheckDiv("idiv_by_small_10", idiv_by_small_10(x), x, 15);
+        intCheckRem("irem_by_pow2_0", irem_by_pow2_0(x), x, 1);
+        intCheckRem("irem_by_pow2_1", irem_by_pow2_1(x), x, 2);
+        intCheckRem("irem_by_pow2_2", irem_by_pow2_2(x), x, 4);
+        intCheckRem("irem_by_pow2_3", irem_by_pow2_3(x), x, 8);
+        intCheckRem("irem_by_pow2_4", irem_by_pow2_4(x), x, 16);
+        intCheckRem("irem_by_pow2_5", irem_by_pow2_5(x), x, 32);
+        intCheckRem("irem_by_pow2_6", irem_by_pow2_6(x), x, 64);
+        intCheckRem("irem_by_pow2_7", irem_by_pow2_7(x), x, 128);
+        intCheckRem("irem_by_pow2_8", irem_by_pow2_8(x), x, 256);
+        intCheckRem("irem_by_pow2_9", irem_by_pow2_9(x), x, 512);
+        intCheckRem("irem_by_pow2_10", irem_by_pow2_10(x), x, 1024);
+        intCheckRem("irem_by_pow2_11", irem_by_pow2_11(x), x, 2048);
+        intCheckRem("irem_by_pow2_12", irem_by_pow2_12(x), x, 4096);
+        intCheckRem("irem_by_pow2_13", irem_by_pow2_13(x), x, 8192);
+        intCheckRem("irem_by_pow2_14", irem_by_pow2_14(x), x, 16384);
+        intCheckRem("irem_by_pow2_15", irem_by_pow2_15(x), x, 32768);
+        intCheckRem("irem_by_pow2_16", irem_by_pow2_16(x), x, 65536);
+        intCheckRem("irem_by_pow2_17", irem_by_pow2_17(x), x, 131072);
+        intCheckRem("irem_by_pow2_18", irem_by_pow2_18(x), x, 262144);
+        intCheckRem("irem_by_pow2_19", irem_by_pow2_19(x), x, 524288);
+        intCheckRem("irem_by_pow2_20", irem_by_pow2_20(x), x, 1048576);
+        intCheckRem("irem_by_pow2_21", irem_by_pow2_21(x), x, 2097152);
+        intCheckRem("irem_by_pow2_22", irem_by_pow2_22(x), x, 4194304);
+        intCheckRem("irem_by_pow2_23", irem_by_pow2_23(x), x, 8388608);
+        intCheckRem("irem_by_pow2_24", irem_by_pow2_24(x), x, 16777216);
+        intCheckRem("irem_by_pow2_25", irem_by_pow2_25(x), x, 33554432);
+        intCheckRem("irem_by_pow2_26", irem_by_pow2_26(x), x, 67108864);
+        intCheckRem("irem_by_pow2_27", irem_by_pow2_27(x), x, 134217728);
+        intCheckRem("irem_by_pow2_28", irem_by_pow2_28(x), x, 268435456);
+        intCheckRem("irem_by_pow2_29", irem_by_pow2_29(x), x, 536870912);
+        intCheckRem("irem_by_pow2_30", irem_by_pow2_30(x), x, 1073741824);
+    }
+
+    public static void longCheckAll(long x) {
+        longCheckDiv("ldiv_by_pow2_0", ldiv_by_pow2_0(x), x, 1l);
+        longCheckDiv("ldiv_by_pow2_1", ldiv_by_pow2_1(x), x, 2l);
+        longCheckDiv("ldiv_by_pow2_2", ldiv_by_pow2_2(x), x, 4l);
+        longCheckDiv("ldiv_by_pow2_3", ldiv_by_pow2_3(x), x, 8l);
+        longCheckDiv("ldiv_by_pow2_4", ldiv_by_pow2_4(x), x, 16l);
+        longCheckDiv("ldiv_by_pow2_5", ldiv_by_pow2_5(x), x, 32l);
+        longCheckDiv("ldiv_by_pow2_6", ldiv_by_pow2_6(x), x, 64l);
+        longCheckDiv("ldiv_by_pow2_7", ldiv_by_pow2_7(x), x, 128l);
+        longCheckDiv("ldiv_by_pow2_8", ldiv_by_pow2_8(x), x, 256l);
+        longCheckDiv("ldiv_by_pow2_9", ldiv_by_pow2_9(x), x, 512l);
+        longCheckDiv("ldiv_by_pow2_10", ldiv_by_pow2_10(x), x, 1024l);
+        longCheckDiv("ldiv_by_pow2_11", ldiv_by_pow2_11(x), x, 2048l);
+        longCheckDiv("ldiv_by_pow2_12", ldiv_by_pow2_12(x), x, 4096l);
+        longCheckDiv("ldiv_by_pow2_13", ldiv_by_pow2_13(x), x, 8192l);
+        longCheckDiv("ldiv_by_pow2_14", ldiv_by_pow2_14(x), x, 16384l);
+        longCheckDiv("ldiv_by_pow2_15", ldiv_by_pow2_15(x), x, 32768l);
+        longCheckDiv("ldiv_by_pow2_16", ldiv_by_pow2_16(x), x, 65536l);
+        longCheckDiv("ldiv_by_pow2_17", ldiv_by_pow2_17(x), x, 131072l);
+        longCheckDiv("ldiv_by_pow2_18", ldiv_by_pow2_18(x), x, 262144l);
+        longCheckDiv("ldiv_by_pow2_19", ldiv_by_pow2_19(x), x, 524288l);
+        longCheckDiv("ldiv_by_pow2_20", ldiv_by_pow2_20(x), x, 1048576l);
+        longCheckDiv("ldiv_by_pow2_21", ldiv_by_pow2_21(x), x, 2097152l);
+        longCheckDiv("ldiv_by_pow2_22", ldiv_by_pow2_22(x), x, 4194304l);
+        longCheckDiv("ldiv_by_pow2_23", ldiv_by_pow2_23(x), x, 8388608l);
+        longCheckDiv("ldiv_by_pow2_24", ldiv_by_pow2_24(x), x, 16777216l);
+        longCheckDiv("ldiv_by_pow2_25", ldiv_by_pow2_25(x), x, 33554432l);
+        longCheckDiv("ldiv_by_pow2_26", ldiv_by_pow2_26(x), x, 67108864l);
+        longCheckDiv("ldiv_by_pow2_27", ldiv_by_pow2_27(x), x, 134217728l);
+        longCheckDiv("ldiv_by_pow2_28", ldiv_by_pow2_28(x), x, 268435456l);
+        longCheckDiv("ldiv_by_pow2_29", ldiv_by_pow2_29(x), x, 536870912l);
+        longCheckDiv("ldiv_by_pow2_30", ldiv_by_pow2_30(x), x, 1073741824l);
+        longCheckDiv("ldiv_by_pow2_31", ldiv_by_pow2_31(x), x, 2147483648l);
+        longCheckDiv("ldiv_by_pow2_32", ldiv_by_pow2_32(x), x, 4294967296l);
+        longCheckDiv("ldiv_by_pow2_33", ldiv_by_pow2_33(x), x, 8589934592l);
+        longCheckDiv("ldiv_by_pow2_34", ldiv_by_pow2_34(x), x, 17179869184l);
+        longCheckDiv("ldiv_by_pow2_35", ldiv_by_pow2_35(x), x, 34359738368l);
+        longCheckDiv("ldiv_by_pow2_36", ldiv_by_pow2_36(x), x, 68719476736l);
+        longCheckDiv("ldiv_by_pow2_37", ldiv_by_pow2_37(x), x, 137438953472l);
+        longCheckDiv("ldiv_by_pow2_38", ldiv_by_pow2_38(x), x, 274877906944l);
+        longCheckDiv("ldiv_by_pow2_39", ldiv_by_pow2_39(x), x, 549755813888l);
+        longCheckDiv("ldiv_by_pow2_40", ldiv_by_pow2_40(x), x, 1099511627776l);
+        longCheckDiv("ldiv_by_pow2_41", ldiv_by_pow2_41(x), x, 2199023255552l);
+        longCheckDiv("ldiv_by_pow2_42", ldiv_by_pow2_42(x), x, 4398046511104l);
+        longCheckDiv("ldiv_by_pow2_43", ldiv_by_pow2_43(x), x, 8796093022208l);
+        longCheckDiv("ldiv_by_pow2_44", ldiv_by_pow2_44(x), x, 17592186044416l);
+        longCheckDiv("ldiv_by_pow2_45", ldiv_by_pow2_45(x), x, 35184372088832l);
+        longCheckDiv("ldiv_by_pow2_46", ldiv_by_pow2_46(x), x, 70368744177664l);
+        longCheckDiv("ldiv_by_pow2_47", ldiv_by_pow2_47(x), x, 140737488355328l);
+        longCheckDiv("ldiv_by_pow2_48", ldiv_by_pow2_48(x), x, 281474976710656l);
+        longCheckDiv("ldiv_by_pow2_49", ldiv_by_pow2_49(x), x, 562949953421312l);
+        longCheckDiv("ldiv_by_pow2_50", ldiv_by_pow2_50(x), x, 1125899906842624l);
+        longCheckDiv("ldiv_by_pow2_51", ldiv_by_pow2_51(x), x, 2251799813685248l);
+        longCheckDiv("ldiv_by_pow2_52", ldiv_by_pow2_52(x), x, 4503599627370496l);
+        longCheckDiv("ldiv_by_pow2_53", ldiv_by_pow2_53(x), x, 9007199254740992l);
+        longCheckDiv("ldiv_by_pow2_54", ldiv_by_pow2_54(x), x, 18014398509481984l);
+        longCheckDiv("ldiv_by_pow2_55", ldiv_by_pow2_55(x), x, 36028797018963968l);
+        longCheckDiv("ldiv_by_pow2_56", ldiv_by_pow2_56(x), x, 72057594037927936l);
+        longCheckDiv("ldiv_by_pow2_57", ldiv_by_pow2_57(x), x, 144115188075855872l);
+        longCheckDiv("ldiv_by_pow2_58", ldiv_by_pow2_58(x), x, 288230376151711744l);
+        longCheckDiv("ldiv_by_pow2_59", ldiv_by_pow2_59(x), x, 576460752303423488l);
+        longCheckDiv("ldiv_by_pow2_60", ldiv_by_pow2_60(x), x, 1152921504606846976l);
+        longCheckDiv("ldiv_by_pow2_61", ldiv_by_pow2_61(x), x, 2305843009213693952l);
+        longCheckDiv("ldiv_by_pow2_62", ldiv_by_pow2_62(x), x, 4611686018427387904l);
+        longCheckDiv("ldiv_by_small_0", ldiv_by_small_0(x), x, 3l);
+        longCheckDiv("ldiv_by_small_1", ldiv_by_small_1(x), x, 5l);
+        longCheckDiv("ldiv_by_small_2", ldiv_by_small_2(x), x, 6l);
+        longCheckDiv("ldiv_by_small_3", ldiv_by_small_3(x), x, 7l);
+        longCheckDiv("ldiv_by_small_4", ldiv_by_small_4(x), x, 9l);
+        longCheckDiv("ldiv_by_small_5", ldiv_by_small_5(x), x, 10l);
+        longCheckDiv("ldiv_by_small_6", ldiv_by_small_6(x), x, 11l);
+        longCheckDiv("ldiv_by_small_7", ldiv_by_small_7(x), x, 12l);
+        longCheckDiv("ldiv_by_small_8", ldiv_by_small_8(x), x, 13l);
+        longCheckDiv("ldiv_by_small_9", ldiv_by_small_9(x), x, 14l);
+        longCheckDiv("ldiv_by_small_10", ldiv_by_small_10(x), x, 15l);
+        longCheckRem("lrem_by_pow2_0", lrem_by_pow2_0(x), x, 1l);
+        longCheckRem("lrem_by_pow2_1", lrem_by_pow2_1(x), x, 2l);
+        longCheckRem("lrem_by_pow2_2", lrem_by_pow2_2(x), x, 4l);
+        longCheckRem("lrem_by_pow2_3", lrem_by_pow2_3(x), x, 8l);
+        longCheckRem("lrem_by_pow2_4", lrem_by_pow2_4(x), x, 16l);
+        longCheckRem("lrem_by_pow2_5", lrem_by_pow2_5(x), x, 32l);
+        longCheckRem("lrem_by_pow2_6", lrem_by_pow2_6(x), x, 64l);
+        longCheckRem("lrem_by_pow2_7", lrem_by_pow2_7(x), x, 128l);
+        longCheckRem("lrem_by_pow2_8", lrem_by_pow2_8(x), x, 256l);
+        longCheckRem("lrem_by_pow2_9", lrem_by_pow2_9(x), x, 512l);
+        longCheckRem("lrem_by_pow2_10", lrem_by_pow2_10(x), x, 1024l);
+        longCheckRem("lrem_by_pow2_11", lrem_by_pow2_11(x), x, 2048l);
+        longCheckRem("lrem_by_pow2_12", lrem_by_pow2_12(x), x, 4096l);
+        longCheckRem("lrem_by_pow2_13", lrem_by_pow2_13(x), x, 8192l);
+        longCheckRem("lrem_by_pow2_14", lrem_by_pow2_14(x), x, 16384l);
+        longCheckRem("lrem_by_pow2_15", lrem_by_pow2_15(x), x, 32768l);
+        longCheckRem("lrem_by_pow2_16", lrem_by_pow2_16(x), x, 65536l);
+        longCheckRem("lrem_by_pow2_17", lrem_by_pow2_17(x), x, 131072l);
+        longCheckRem("lrem_by_pow2_18", lrem_by_pow2_18(x), x, 262144l);
+        longCheckRem("lrem_by_pow2_19", lrem_by_pow2_19(x), x, 524288l);
+        longCheckRem("lrem_by_pow2_20", lrem_by_pow2_20(x), x, 1048576l);
+        longCheckRem("lrem_by_pow2_21", lrem_by_pow2_21(x), x, 2097152l);
+        longCheckRem("lrem_by_pow2_22", lrem_by_pow2_22(x), x, 4194304l);
+        longCheckRem("lrem_by_pow2_23", lrem_by_pow2_23(x), x, 8388608l);
+        longCheckRem("lrem_by_pow2_24", lrem_by_pow2_24(x), x, 16777216l);
+        longCheckRem("lrem_by_pow2_25", lrem_by_pow2_25(x), x, 33554432l);
+        longCheckRem("lrem_by_pow2_26", lrem_by_pow2_26(x), x, 67108864l);
+        longCheckRem("lrem_by_pow2_27", lrem_by_pow2_27(x), x, 134217728l);
+        longCheckRem("lrem_by_pow2_28", lrem_by_pow2_28(x), x, 268435456l);
+        longCheckRem("lrem_by_pow2_29", lrem_by_pow2_29(x), x, 536870912l);
+        longCheckRem("lrem_by_pow2_30", lrem_by_pow2_30(x), x, 1073741824l);
+        longCheckRem("lrem_by_pow2_31", lrem_by_pow2_31(x), x, 2147483648l);
+        longCheckRem("lrem_by_pow2_32", lrem_by_pow2_32(x), x, 4294967296l);
+        longCheckRem("lrem_by_pow2_33", lrem_by_pow2_33(x), x, 8589934592l);
+        longCheckRem("lrem_by_pow2_34", lrem_by_pow2_34(x), x, 17179869184l);
+        longCheckRem("lrem_by_pow2_35", lrem_by_pow2_35(x), x, 34359738368l);
+        longCheckRem("lrem_by_pow2_36", lrem_by_pow2_36(x), x, 68719476736l);
+        longCheckRem("lrem_by_pow2_37", lrem_by_pow2_37(x), x, 137438953472l);
+        longCheckRem("lrem_by_pow2_38", lrem_by_pow2_38(x), x, 274877906944l);
+        longCheckRem("lrem_by_pow2_39", lrem_by_pow2_39(x), x, 549755813888l);
+        longCheckRem("lrem_by_pow2_40", lrem_by_pow2_40(x), x, 1099511627776l);
+        longCheckRem("lrem_by_pow2_41", lrem_by_pow2_41(x), x, 2199023255552l);
+        longCheckRem("lrem_by_pow2_42", lrem_by_pow2_42(x), x, 4398046511104l);
+        longCheckRem("lrem_by_pow2_43", lrem_by_pow2_43(x), x, 8796093022208l);
+        longCheckRem("lrem_by_pow2_44", lrem_by_pow2_44(x), x, 17592186044416l);
+        longCheckRem("lrem_by_pow2_45", lrem_by_pow2_45(x), x, 35184372088832l);
+        longCheckRem("lrem_by_pow2_46", lrem_by_pow2_46(x), x, 70368744177664l);
+        longCheckRem("lrem_by_pow2_47", lrem_by_pow2_47(x), x, 140737488355328l);
+        longCheckRem("lrem_by_pow2_48", lrem_by_pow2_48(x), x, 281474976710656l);
+        longCheckRem("lrem_by_pow2_49", lrem_by_pow2_49(x), x, 562949953421312l);
+        longCheckRem("lrem_by_pow2_50", lrem_by_pow2_50(x), x, 1125899906842624l);
+        longCheckRem("lrem_by_pow2_51", lrem_by_pow2_51(x), x, 2251799813685248l);
+        longCheckRem("lrem_by_pow2_52", lrem_by_pow2_52(x), x, 4503599627370496l);
+        longCheckRem("lrem_by_pow2_53", lrem_by_pow2_53(x), x, 9007199254740992l);
+        longCheckRem("lrem_by_pow2_54", lrem_by_pow2_54(x), x, 18014398509481984l);
+        longCheckRem("lrem_by_pow2_55", lrem_by_pow2_55(x), x, 36028797018963968l);
+        longCheckRem("lrem_by_pow2_56", lrem_by_pow2_56(x), x, 72057594037927936l);
+        longCheckRem("lrem_by_pow2_57", lrem_by_pow2_57(x), x, 144115188075855872l);
+        longCheckRem("lrem_by_pow2_58", lrem_by_pow2_58(x), x, 288230376151711744l);
+        longCheckRem("lrem_by_pow2_59", lrem_by_pow2_59(x), x, 576460752303423488l);
+        longCheckRem("lrem_by_pow2_60", lrem_by_pow2_60(x), x, 1152921504606846976l);
+        longCheckRem("lrem_by_pow2_61", lrem_by_pow2_61(x), x, 2305843009213693952l);
+        longCheckRem("lrem_by_pow2_62", lrem_by_pow2_62(x), x, 4611686018427387904l);
+    }
+
+    public static void main(String[] args) {
+      int i;
+      long l;
+
+      System.out.println("Begin");
+
+      System.out.println("Int: checking some equally spaced dividends...");
+      for (i = -1000; i < 1000; i += 300) {
+          intCheckAll(i);
+          intCheckAll(-i);
+      }
+
+      System.out.println("Int: checking small dividends...");
+      for (i = 1; i < 100; i += 1) {
+          intCheckAll(i);
+          intCheckAll(-i);
+      }
+
+      System.out.println("Int: checking big dividends...");
+      for (i = 0; i < 100; i += 1) {
+          intCheckAll(Integer.MAX_VALUE - i);
+          intCheckAll(Integer.MIN_VALUE + i);
+      }
+
+      System.out.println("Long: checking some equally spaced dividends...");
+      for (l = 0l; l < 1000000000000l; l += 300000000000l) {
+          longCheckAll(l);
+          longCheckAll(-l);
+      }
+
+      System.out.println("Long: checking small dividends...");
+      for (l = 1l; l < 100l; l += 1l) {
+          longCheckAll(l);
+          longCheckAll(-l);
+      }
+
+      System.out.println("Long: checking big dividends...");
+      for (l = 0l; l < 100l; l += 1l) {
+          longCheckAll(Long.MAX_VALUE - l);
+          longCheckAll(Long.MIN_VALUE + l);
+      }
+
+      System.out.println("End");
+    }
+}
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index 16300bb..2b142db 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -203,6 +203,7 @@
 	ANDROID_ROOT=$(HOST_OUT) \
 	ANDROID_LOG_TAGS='*:d' \
 	LD_LIBRARY_PATH=$$($(2)ART_HOST_OUT_SHARED_LIBRARIES) \
+	LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) \
 	$(HOST_OUT_EXECUTABLES)/dalvikvm$$($(2)ART_PHONY_TEST_HOST_SUFFIX) $(DALVIKVM_FLAGS) $(5) \
 	    -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$$(HOST_CORE_IMG_LOCATION) \
 	    -classpath $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 25bcf0a..78312d1 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -21,26 +21,63 @@
 TEST_ART_RUN_TESTS := $(wildcard $(LOCAL_PATH)/[0-9]*)
 TEST_ART_RUN_TESTS := $(subst $(LOCAL_PATH)/,, $(TEST_ART_RUN_TESTS))
 
+# List all the test names for host and target excluding the -trace suffix
+# $(1): test name, e.g. 003-omnibus-opcodes
+# $(2): undefined or -trace
+define all-run-test-names
+  test-art-host-run-test$(2)-default-$(1)32 \
+  test-art-host-run-test$(2)-optimizing-$(1)32 \
+  test-art-host-run-test$(2)-interpreter-$(1)32 \
+  test-art-host-run-test$(2)-default-$(1)64 \
+  test-art-host-run-test$(2)-optimizing-$(1)64 \
+  test-art-host-run-test$(2)-interpreter-$(1)64 \
+  test-art-target-run-test$(2)-default-$(1)32 \
+  test-art-target-run-test$(2)-optimizing-$(1)32 \
+  test-art-target-run-test$(2)-interpreter-$(1)32 \
+  test-art-target-run-test$(2)-default-$(1)64 \
+  test-art-target-run-test$(2)-optimizing-$(1)64 \
+  test-art-target-run-test$(2)-interpreter-$(1)64
+endef  # all-run-test-names
+
 # Tests that are timing sensitive and flaky on heavily loaded systems.
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS := \
-  test-art-host-run-test-default-053-wait-some32 \
-  test-art-host-run-test-default-053-wait-some64 \
-  test-art-host-run-test-interpreter-053-wait-some32 \
-  test-art-host-run-test-interpreter-053-wait-some64 \
-  test-art-host-run-test-optimizing-053-wait-some32 \
-  test-art-host-run-test-optimizing-053-wait-some64 \
-  test-art-host-run-test-default-055-enum-performance32 \
-  test-art-host-run-test-default-055-enum-performance64 \
-  test-art-host-run-test-interpreter-055-enum-performance32 \
-  test-art-host-run-test-interpreter-055-enum-performance64 \
-  test-art-host-run-test-optimizing-055-enum-performance32 \
-  test-art-host-run-test-optimizing-055-enum-performance64
+  053-wait-some \
+  055-enum-performance
 
  # disable timing sensitive tests on "dist" builds.
 ifdef dist_goal
-  ART_TEST_KNOWN_BROKEN += $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS)
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace))
 endif
 
+# Tests that are broken in --trace mode.
+TEST_ART_BROKEN_TRACE_RUN_TESTS := \
+  003-omnibus-opcodes \
+  004-annotations \
+  018-stack-overflow \
+  023-many-interfaces \
+  031-class-attributes \
+  037-inherit \
+  044-proxy \
+  046-reflect \
+  051-thread \
+  055-enum-performance \
+  064-field-access \
+  078-polymorphic-virtual \
+  080-oom-throw \
+  082-inline-execute \
+  083-compiler-regressions \
+  093-serialization \
+  097-duplicate-method \
+  100-reflect2 \
+  102-concurrent-gc \
+  103-string-append \
+  107-int-math2 \
+  112-double-math \
+  701-easy-div-rem
+
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace))
+
 # The path where build only targets will be output, e.g.
 # out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
 art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
@@ -96,9 +133,11 @@
 ART_TEST_HOST_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING_RULES :=
+ART_TEST_HOST_RUN_TEST_ALL$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
@@ -124,8 +163,10 @@
 # $(2): host or target
 # $(3): default, optimizing or interpreter
 # $(4): 32 or 64
+# $(5): run tests with tracing enabled or not: trace or undefined
 define define-test-art-run-test
   run_test_options := $(addprefix --runtime-option ,$(DALVIKVM_FLAGS))
+  run_test_rule_name := test-art-$(2)-run-test-$(3)-$(1)$(4)
   uc_host_or_target :=
   prereq_rule :=
   ifeq ($(2),host)
@@ -163,7 +204,14 @@
       $$(error found $(4) expected 32 or 64)
     endif
   endif
-  run_test_rule_name := test-art-$(2)-run-test-$(3)-$(1)$(4)
+  ifeq ($(5),trace)
+    run_test_options += --trace
+    run_test_rule_name := test-art-$(2)-run-test-trace-$(3)-$(1)$(4)
+  else
+    ifneq (,$(5))
+      $$(error found $(5) expected undefined or -trace)
+    endif
+  endif
   run_test_options := --output-path $(ART_HOST_TEST_DIR)/run-test-output/$$(run_test_rule_name) \
     $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
@@ -222,9 +270,13 @@
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_INTERPRETER_$(1)_RULES :=
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_OPTIMIZING_$(1)_RULES :=
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)_RULES :=
-  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)))
-  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)))
-  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
+  ifeq ($(2),host)
+    # For now just test tracing on the host with default.
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace))
+  endif
   do_second := false
   ifeq ($(2),host)
     ifneq ($$(HOST_PREFER_32_BIT),true)
@@ -236,9 +288,13 @@
     endif
   endif
   ifeq (true,$$(do_second))
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)))
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)))
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
+    ifeq ($(2),host)
+      # For now just test tracing on the host with default.
+      $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace))
+    endif
   endif
 
   $$(eval $$(call define-test-art-run-test-group-rule,test-art-$(2)-run-test-default-$(1), \
@@ -319,6 +375,7 @@
 define-test-art-run-test :=
 define-test-art-run-test-group-rule :=
 define-test-art-run-test-group :=
+all-run-test-names :=
 ART_TEST_TARGET_RUN_TEST_ALL_RULES :=
 ART_TEST_TARGET_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER_RULES :=
@@ -335,9 +392,11 @@
 ART_TEST_HOST_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING_RULES :=
+ART_TEST_HOST_RUN_TEST_ALL$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/ReferenceMap/stack_walk_refmap_jni.cc
index 87187ed..e5a17861 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/ReferenceMap/stack_walk_refmap_jni.cc
@@ -20,12 +20,10 @@
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "gc_map.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "jni.h"
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/StackWalk/stack_walk_jni.cc
index c849c54..e404f6a 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/StackWalk/stack_walk_jni.cc
@@ -19,12 +19,10 @@
 
 #include "class_linker.h"
 #include "gc_map.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "object_utils.h"
 #include "jni.h"
 #include "scoped_thread_state_change.h"
 
diff --git a/tools/generate-operator-out.py b/tools/generate-operator-out.py
index 6baa6e3..f666ad1 100755
--- a/tools/generate-operator-out.py
+++ b/tools/generate-operator-out.py
@@ -163,35 +163,35 @@
     header_files.append(header_file)
     ProcessFile(header_file)
 
-  print '#include <iostream>'
-  print
+  print('#include <iostream>')
+  print('')
 
   for header_file in header_files:
     header_file = header_file.replace(local_path + '/', '')
-    print '#include "%s"' % header_file
+    print('#include "%s"' % header_file)
 
-  print
+  print('')
 
   for enum_name in _ENUMS:
-    print '// This was automatically generated by %s --- do not edit!' % sys.argv[0]
+    print('// This was automatically generated by %s --- do not edit!' % sys.argv[0])
 
     namespaces = _NAMESPACES[enum_name].split('::')
     for namespace in namespaces:
-      print 'namespace %s {' % namespace
+      print('namespace %s {' % namespace)
 
-    print 'std::ostream& operator<<(std::ostream& os, const %s& rhs) {' % enum_name
-    print '  switch (rhs) {'
+    print('std::ostream& operator<<(std::ostream& os, const %s& rhs) {' % enum_name)
+    print('  switch (rhs) {')
     for (enum_value, enum_text) in _ENUMS[enum_name]:
-      print '    case %s: os << "%s"; break;' % (enum_value, enum_text)
+      print('    case %s: os << "%s"; break;' % (enum_value, enum_text))
     if not _ENUM_CLASSES[enum_name]:
-      print '    default: os << "%s[" << static_cast<int>(rhs) << "]"; break;' % enum_name
-    print '  }'
-    print '  return os;'
-    print '}'
+      print('    default: os << "%s[" << static_cast<int>(rhs) << "]"; break;' % enum_name)
+    print('  }')
+    print('  return os;')
+    print('}')
 
     for namespace in reversed(namespaces):
-      print '}  // namespace %s' % namespace
-    print
+      print('}  // namespace %s' % namespace)
+    print('')
 
   sys.exit(0)